diff --git a/lib/spack/spack/binary_distribution.py b/lib/spack/spack/binary_distribution.py index b712e82cc0..7ae91dd6bf 100644 --- a/lib/spack/spack/binary_distribution.py +++ b/lib/spack/spack/binary_distribution.py @@ -23,7 +23,7 @@ import llnl.util.filesystem as fsys import llnl.util.lang import llnl.util.tty as tty -from llnl.util.filesystem import mkdirp +from llnl.util.filesystem import BaseDirectoryVisitor, mkdirp, visit_directory_tree import spack.cmd import spack.config as config @@ -642,6 +642,51 @@ def read_buildinfo_file(prefix): return buildinfo +class BuildManifestVisitor(BaseDirectoryVisitor): + """Visitor that collects a list of files and symlinks + that can be checked for need of relocation. It knows how + to dedupe hardlinks and deal with symlinks to files and + directories.""" + + def __init__(self): + # Save unique identifiers of files to avoid + # relocating hardlink files for each path. + self.visited = set() + + # Lists of files we will check + self.files = [] + self.symlinks = [] + + def seen_before(self, root, rel_path): + stat_result = os.lstat(os.path.join(root, rel_path)) + identifier = (stat_result.st_dev, stat_result.st_ino) + if identifier in self.visited: + return True + else: + self.visited.add(identifier) + return False + + def visit_file(self, root, rel_path, depth): + if self.seen_before(root, rel_path): + return + self.files.append(rel_path) + + def visit_symlinked_file(self, root, rel_path, depth): + # Note: symlinks *can* be hardlinked, but it is unclear if + # symlinks can be relinked in-place (preserving inode). + # Therefore, we do *not* de-dupe hardlinked symlinks. + self.symlinks.append(rel_path) + + def before_visit_dir(self, root, rel_path, depth): + return os.path.basename(rel_path) not in (".spack", "man") + + def before_visit_symlinked_dir(self, root, rel_path, depth): + # Treat symlinked directories simply as symlinks. + self.visit_symlinked_file(root, rel_path, depth) + # Never recurse into symlinked directories. + return False + + def get_buildfile_manifest(spec): """ Return a data structure with information about a build, including @@ -657,57 +702,52 @@ def get_buildfile_manifest(spec): "link_to_relocate": [], "other": [], "binary_to_relocate_fullpath": [], + "hardlinks_deduped": True, } - exclude_list = (".spack", "man") + # Guard against filesystem footguns of hardlinks and symlinks by using + # a visitor to retrieve a list of files and symlinks, so we don't have + # to worry about hardlinks of symlinked dirs and what not. + visitor = BuildManifestVisitor() + root = spec.prefix + visit_directory_tree(root, visitor) - # Do this at during tarball creation to save time when tarball unpacked. - # Used by make_package_relative to determine binaries to change. - for root, dirs, files in os.walk(spec.prefix, topdown=True): - dirs[:] = [d for d in dirs if d not in exclude_list] + # Symlinks. - # Directories may need to be relocated too. - for directory in dirs: - dir_path_name = os.path.join(root, directory) - rel_path_name = os.path.relpath(dir_path_name, spec.prefix) - if os.path.islink(dir_path_name): - link = os.readlink(dir_path_name) - if os.path.isabs(link) and link.startswith(spack.store.layout.root): - data["link_to_relocate"].append(rel_path_name) + # Obvious bugs: + # 1. relative links are not relocated. + # 2. paths are used as strings. + for rel_path in visitor.symlinks: + abs_path = os.path.join(root, rel_path) + link = os.readlink(abs_path) + if os.path.isabs(link) and link.startswith(spack.store.layout.root): + data["link_to_relocate"].append(rel_path) - for filename in files: - path_name = os.path.join(root, filename) - m_type, m_subtype = fsys.mime_type(path_name) - rel_path_name = os.path.relpath(path_name, spec.prefix) - added = False + # Non-symlinks. + for rel_path in visitor.files: + abs_path = os.path.join(root, rel_path) + m_type, m_subtype = fsys.mime_type(abs_path) - if os.path.islink(path_name): - link = os.readlink(path_name) - if os.path.isabs(link): - # Relocate absolute links into the spack tree - if link.startswith(spack.store.layout.root): - data["link_to_relocate"].append(rel_path_name) - added = True + if relocate.needs_binary_relocation(m_type, m_subtype): + # Why is this branch not part of needs_binary_relocation? :( + if ( + ( + m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable") + and sys.platform != "darwin" + ) + or (m_subtype in ("x-mach-binary") and sys.platform == "darwin") + or (not rel_path.endswith(".o")) + ): + data["binary_to_relocate"].append(rel_path) + data["binary_to_relocate_fullpath"].append(abs_path) + continue - if relocate.needs_binary_relocation(m_type, m_subtype): - if ( - ( - m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable") - and sys.platform != "darwin" - ) - or (m_subtype in ("x-mach-binary") and sys.platform == "darwin") - or (not filename.endswith(".o")) - ): - data["binary_to_relocate"].append(rel_path_name) - data["binary_to_relocate_fullpath"].append(path_name) - added = True + elif relocate.needs_text_relocation(m_type, m_subtype): + data["text_to_relocate"].append(rel_path) + continue - if relocate.needs_text_relocation(m_type, m_subtype): - data["text_to_relocate"].append(rel_path_name) - added = True + data["other"].append(abs_path) - if not added: - data["other"].append(path_name) return data @@ -734,6 +774,7 @@ def write_buildinfo_file(spec, workdir, rel=False): buildinfo["relocate_textfiles"] = manifest["text_to_relocate"] buildinfo["relocate_binaries"] = manifest["binary_to_relocate"] buildinfo["relocate_links"] = manifest["link_to_relocate"] + buildinfo["hardlinks_deduped"] = manifest["hardlinks_deduped"] buildinfo["prefix_to_hash"] = prefix_to_hash filename = buildinfo_file_name(workdir) with open(filename, "w") as outfile: @@ -1441,6 +1482,38 @@ def check_package_relocatable(workdir, spec, allow_root): relocate.raise_if_not_relocatable(cur_path_names, allow_root) +def dedupe_hardlinks_if_necessary(root, buildinfo): + """Updates a buildinfo dict for old archives that did + not dedupe hardlinks. De-duping hardlinks is necessary + when relocating files in parallel and in-place. This + means we must preserve inodes when relocating.""" + + # New archives don't need this. + if buildinfo.get("hardlinks_deduped", False): + return + + # Clearly we can assume that an inode is either in the + # textfile or binary group, but let's just stick to + # a single set of visited nodes. + visited = set() + + # Note: we do *not* dedupe hardlinked symlinks, since + # it seems difficult or even impossible to relink + # symlinks while preserving inode. + for key in ("relocate_textfiles", "relocate_binaries"): + if key not in buildinfo: + continue + new_list = [] + for rel_path in buildinfo[key]: + stat_result = os.lstat(os.path.join(root, rel_path)) + identifier = (stat_result.st_dev, stat_result.st_ino) + if identifier in visited: + continue + visited.add(identifier) + new_list.append(rel_path) + buildinfo[key] = new_list + + def relocate_package(spec, allow_root): """ Relocate the given package @@ -1503,6 +1576,9 @@ def relocate_package(spec, allow_root): tty.debug("Relocating package from", "%s to %s." % (old_layout_root, new_layout_root)) + # Old archives maybe have hardlinks repeated. + dedupe_hardlinks_if_necessary(workdir, buildinfo) + def is_backup_file(file): return file.endswith("~") @@ -1548,7 +1624,7 @@ def is_backup_file(file): # For all buildcaches # relocate the install prefixes in text files including dependencies - relocate.relocate_text(text_names, prefix_to_prefix_text) + relocate.unsafe_relocate_text(text_names, prefix_to_prefix_text) paths_to_relocate = [old_prefix, old_layout_root] paths_to_relocate.extend(prefix_to_hash.keys()) @@ -1564,13 +1640,13 @@ def is_backup_file(file): ) ) # relocate the install prefixes in binary files including dependencies - relocate.relocate_text_bin(files_to_relocate, prefix_to_prefix_bin) + relocate.unsafe_relocate_text_bin(files_to_relocate, prefix_to_prefix_bin) # If we are installing back to the same location # relocate the sbang location if the spack directory changed else: if old_spack_prefix != new_spack_prefix: - relocate.relocate_text(text_names, prefix_to_prefix_text) + relocate.unsafe_relocate_text(text_names, prefix_to_prefix_text) def _extract_inner_tarball(spec, filename, extract_to, unsigned, remote_checksum): diff --git a/lib/spack/spack/filesystem_view.py b/lib/spack/spack/filesystem_view.py index 2373ec5e45..db2d6c9477 100644 --- a/lib/spack/spack/filesystem_view.py +++ b/lib/spack/spack/filesystem_view.py @@ -95,11 +95,11 @@ def view_copy(src, dst, view, spec=None): prefix_to_projection[dep.prefix] = view.get_projection_for_spec(dep) if spack.relocate.is_binary(dst): - spack.relocate.relocate_text_bin(binaries=[dst], prefixes=prefix_to_projection) + spack.relocate.unsafe_relocate_text_bin(binaries=[dst], prefixes=prefix_to_projection) else: prefix_to_projection[spack.store.layout.root] = view._root prefix_to_projection[orig_sbang] = new_sbang - spack.relocate.relocate_text(files=[dst], prefixes=prefix_to_projection) + spack.relocate.unsafe_relocate_text(files=[dst], prefixes=prefix_to_projection) try: stat = os.stat(src) os.chown(dst, stat.st_uid, stat.st_gid) diff --git a/lib/spack/spack/relocate.py b/lib/spack/spack/relocate.py index 3ef332c204..3297965b6e 100644 --- a/lib/spack/spack/relocate.py +++ b/lib/spack/spack/relocate.py @@ -744,12 +744,15 @@ def relocate_links(links, orig_layout_root, orig_install_prefix, new_install_pre tty.warn(msg.format(link_target, abs_link, new_install_prefix)) -def relocate_text(files, prefixes, concurrency=32): +def unsafe_relocate_text(files, prefixes, concurrency=32): """Relocate text file from the original installation prefix to the new prefix. Relocation also affects the the path in Spack's sbang script. + Note: unsafe when files contains duplicates, such as repeated paths, + symlinks, hardlinks. + Args: files (list): Text files to be relocated prefixes (OrderedDict): String prefixes which need to be changed @@ -786,11 +789,14 @@ def relocate_text(files, prefixes, concurrency=32): tp.join() -def relocate_text_bin(binaries, prefixes, concurrency=32): +def unsafe_relocate_text_bin(binaries, prefixes, concurrency=32): """Replace null terminated path strings hard coded into binaries. The new install prefix must be shorter than the original one. + Note: unsafe when files contains duplicates, such as repeated paths, + symlinks, hardlinks. + Args: binaries (list): binaries to be relocated prefixes (OrderedDict): String prefixes which need to be changed. diff --git a/lib/spack/spack/rewiring.py b/lib/spack/spack/rewiring.py index 8a2dcad035..6b6a37ddd1 100644 --- a/lib/spack/spack/rewiring.py +++ b/lib/spack/spack/rewiring.py @@ -70,7 +70,7 @@ def rewire_node(spec, explicit): for rel_path in manifest.get("text_to_relocate", []) ] if text_to_relocate: - relocate.relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix) + relocate.unsafe_relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix) bins_to_relocate = [ os.path.join(tempdir, spec.dag_hash(), rel_path) @@ -97,7 +97,7 @@ def rewire_node(spec, explicit): spec.build_spec.prefix, spec.prefix, ) - relocate.relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix) + relocate.unsafe_relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix) # Copy package into place, except for spec.json (because spec.json # describes the old spec and not the new spliced spec). shutil.copytree( diff --git a/lib/spack/spack/test/bindist.py b/lib/spack/spack/test/bindist.py index 7e9066ccbf..d0a8a0a4f0 100644 --- a/lib/spack/spack/test/bindist.py +++ b/lib/spack/spack/test/bindist.py @@ -11,6 +11,8 @@ import py import pytest +from llnl.util.filesystem import visit_directory_tree + import spack.binary_distribution as bindist import spack.config import spack.hooks.sbang as sbang @@ -633,3 +635,47 @@ def test_FetchCacheError_pretty_printing_single(): assert "Multiple errors" not in str_e assert "RuntimeError: Oops!" in str_e assert str_e.rstrip() == str_e + + +def test_build_manifest_visitor(tmpdir): + dir = "directory" + file = os.path.join("directory", "file") + + with tmpdir.as_cwd(): + # Create a file inside a directory + os.mkdir(dir) + with open(file, "wb") as f: + f.write(b"example file") + + # Symlink the dir + os.symlink(dir, "symlink_to_directory") + + # Symlink the file + os.symlink(file, "symlink_to_file") + + # Hardlink the file + os.link(file, "hardlink_of_file") + + # Hardlinked symlinks: seems like this is only a thing on Linux, + # on Darwin the symlink *target* is hardlinked, on Linux the + # symlink *itself* is hardlinked. + if sys.platform.startswith("linux"): + os.link("symlink_to_file", "hardlink_of_symlink_to_file") + os.link("symlink_to_directory", "hardlink_of_symlink_to_directory") + + visitor = bindist.BuildManifestVisitor() + visit_directory_tree(str(tmpdir), visitor) + + # We de-dupe hardlinks of files, so there should really be just one file + assert len(visitor.files) == 1 + + # We do not de-dupe symlinks, cause it's unclear how to update symlinks + # in-place, preserving inodes. + if sys.platform.startswith("linux"): + assert len(visitor.symlinks) == 4 # includes hardlinks of symlinks. + else: + assert len(visitor.symlinks) == 2 + + with tmpdir.as_cwd(): + assert not any(os.path.islink(f) or os.path.isdir(f) for f in visitor.files) + assert all(os.path.islink(f) for f in visitor.symlinks) diff --git a/lib/spack/spack/test/packaging.py b/lib/spack/spack/test/packaging.py index f0d1c240e8..0ed4f943a3 100644 --- a/lib/spack/spack/test/packaging.py +++ b/lib/spack/spack/test/packaging.py @@ -36,7 +36,7 @@ needs_binary_relocation, needs_text_relocation, relocate_links, - relocate_text, + unsafe_relocate_text, ) from spack.spec import Spec @@ -190,7 +190,7 @@ def test_buildcache(mock_archive, tmpdir): @pytest.mark.usefixtures("install_mockery") -def test_relocate_text(tmpdir): +def test_unsafe_relocate_text(tmpdir): spec = Spec("trivial-install-test-package") spec.concretize() with tmpdir.as_cwd(): @@ -203,7 +203,7 @@ def test_relocate_text(tmpdir): filenames = [filename] new_dir = "/opt/rh/devtoolset/" # Singleton dict doesn't matter if Ordered - relocate_text(filenames, {old_dir: new_dir}) + unsafe_relocate_text(filenames, {old_dir: new_dir}) with open(filename, "r") as script: for line in script: assert new_dir in line diff --git a/lib/spack/spack/test/relocate.py b/lib/spack/spack/test/relocate.py index b79f0ba1a4..ab306759cd 100644 --- a/lib/spack/spack/test/relocate.py +++ b/lib/spack/spack/test/relocate.py @@ -406,7 +406,7 @@ def test_relocate_text_bin(hello_world, copy_binary, tmpdir): orig_path_bytes = str(orig_binary.dirpath()).encode("utf-8") new_path_bytes = str(new_binary.dirpath()).encode("utf-8") - spack.relocate.relocate_text_bin([str(new_binary)], {orig_path_bytes: new_path_bytes}) + spack.relocate.unsafe_relocate_text_bin([str(new_binary)], {orig_path_bytes: new_path_bytes}) # Check original directory is not there anymore and it was # substituted with the new one @@ -421,7 +421,7 @@ def test_relocate_text_bin_raise_if_new_prefix_is_longer(tmpdir): with open(fpath, "w") as f: f.write("/short") with pytest.raises(spack.relocate.BinaryTextReplaceError): - spack.relocate.relocate_text_bin([fpath], {short_prefix: long_prefix}) + spack.relocate.unsafe_relocate_text_bin([fpath], {short_prefix: long_prefix}) @pytest.mark.requires_executables("install_name_tool", "file", "cc")