Relocation should take hardlinks into account (#33460)

Currently `relocate_text` and `relocate_text_bin` are unsafe in the
sense that they run in parallel, and lead to races when modifying
different items pointing to the same inode.

This leads to the issue observed in #33453.

This PR:

1. Renames those functions to `unsafe_*` so people are aware
2. Adds logic to deal with hardlinks in current binary packages
3. Adds logic to deal with hardlinks when creating new binary tarballs,
   so the install side doesn't have to de-dupe hardlinks.
4. Adds a test for 3

The assumption is that all our relocation logic preserves inodes. That
is, we should never copy a file, modify it, and then move it back. I
quickly verified, and its seems like this is true for (binary) text
relocation, as well as rpath patching in patchelf (even when the file
grows) and mach-o fixes.
This commit is contained in:
Harmen Stoppels 2022-10-21 18:30:26 +02:00 committed by GitHub
parent 7c3d93465c
commit f9112a6244
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 185 additions and 57 deletions

View file

@ -23,7 +23,7 @@
import llnl.util.filesystem as fsys
import llnl.util.lang
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp
from llnl.util.filesystem import BaseDirectoryVisitor, mkdirp, visit_directory_tree
import spack.cmd
import spack.config as config
@ -642,6 +642,51 @@ def read_buildinfo_file(prefix):
return buildinfo
class BuildManifestVisitor(BaseDirectoryVisitor):
"""Visitor that collects a list of files and symlinks
that can be checked for need of relocation. It knows how
to dedupe hardlinks and deal with symlinks to files and
directories."""
def __init__(self):
# Save unique identifiers of files to avoid
# relocating hardlink files for each path.
self.visited = set()
# Lists of files we will check
self.files = []
self.symlinks = []
def seen_before(self, root, rel_path):
stat_result = os.lstat(os.path.join(root, rel_path))
identifier = (stat_result.st_dev, stat_result.st_ino)
if identifier in self.visited:
return True
else:
self.visited.add(identifier)
return False
def visit_file(self, root, rel_path, depth):
if self.seen_before(root, rel_path):
return
self.files.append(rel_path)
def visit_symlinked_file(self, root, rel_path, depth):
# Note: symlinks *can* be hardlinked, but it is unclear if
# symlinks can be relinked in-place (preserving inode).
# Therefore, we do *not* de-dupe hardlinked symlinks.
self.symlinks.append(rel_path)
def before_visit_dir(self, root, rel_path, depth):
return os.path.basename(rel_path) not in (".spack", "man")
def before_visit_symlinked_dir(self, root, rel_path, depth):
# Treat symlinked directories simply as symlinks.
self.visit_symlinked_file(root, rel_path, depth)
# Never recurse into symlinked directories.
return False
def get_buildfile_manifest(spec):
"""
Return a data structure with information about a build, including
@ -657,57 +702,52 @@ def get_buildfile_manifest(spec):
"link_to_relocate": [],
"other": [],
"binary_to_relocate_fullpath": [],
"hardlinks_deduped": True,
}
exclude_list = (".spack", "man")
# Guard against filesystem footguns of hardlinks and symlinks by using
# a visitor to retrieve a list of files and symlinks, so we don't have
# to worry about hardlinks of symlinked dirs and what not.
visitor = BuildManifestVisitor()
root = spec.prefix
visit_directory_tree(root, visitor)
# Do this at during tarball creation to save time when tarball unpacked.
# Used by make_package_relative to determine binaries to change.
for root, dirs, files in os.walk(spec.prefix, topdown=True):
dirs[:] = [d for d in dirs if d not in exclude_list]
# Symlinks.
# Directories may need to be relocated too.
for directory in dirs:
dir_path_name = os.path.join(root, directory)
rel_path_name = os.path.relpath(dir_path_name, spec.prefix)
if os.path.islink(dir_path_name):
link = os.readlink(dir_path_name)
if os.path.isabs(link) and link.startswith(spack.store.layout.root):
data["link_to_relocate"].append(rel_path_name)
# Obvious bugs:
# 1. relative links are not relocated.
# 2. paths are used as strings.
for rel_path in visitor.symlinks:
abs_path = os.path.join(root, rel_path)
link = os.readlink(abs_path)
if os.path.isabs(link) and link.startswith(spack.store.layout.root):
data["link_to_relocate"].append(rel_path)
for filename in files:
path_name = os.path.join(root, filename)
m_type, m_subtype = fsys.mime_type(path_name)
rel_path_name = os.path.relpath(path_name, spec.prefix)
added = False
# Non-symlinks.
for rel_path in visitor.files:
abs_path = os.path.join(root, rel_path)
m_type, m_subtype = fsys.mime_type(abs_path)
if os.path.islink(path_name):
link = os.readlink(path_name)
if os.path.isabs(link):
# Relocate absolute links into the spack tree
if link.startswith(spack.store.layout.root):
data["link_to_relocate"].append(rel_path_name)
added = True
if relocate.needs_binary_relocation(m_type, m_subtype):
# Why is this branch not part of needs_binary_relocation? :(
if (
(
m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable")
and sys.platform != "darwin"
)
or (m_subtype in ("x-mach-binary") and sys.platform == "darwin")
or (not rel_path.endswith(".o"))
):
data["binary_to_relocate"].append(rel_path)
data["binary_to_relocate_fullpath"].append(abs_path)
continue
if relocate.needs_binary_relocation(m_type, m_subtype):
if (
(
m_subtype in ("x-executable", "x-sharedlib", "x-pie-executable")
and sys.platform != "darwin"
)
or (m_subtype in ("x-mach-binary") and sys.platform == "darwin")
or (not filename.endswith(".o"))
):
data["binary_to_relocate"].append(rel_path_name)
data["binary_to_relocate_fullpath"].append(path_name)
added = True
elif relocate.needs_text_relocation(m_type, m_subtype):
data["text_to_relocate"].append(rel_path)
continue
if relocate.needs_text_relocation(m_type, m_subtype):
data["text_to_relocate"].append(rel_path_name)
added = True
data["other"].append(abs_path)
if not added:
data["other"].append(path_name)
return data
@ -734,6 +774,7 @@ def write_buildinfo_file(spec, workdir, rel=False):
buildinfo["relocate_textfiles"] = manifest["text_to_relocate"]
buildinfo["relocate_binaries"] = manifest["binary_to_relocate"]
buildinfo["relocate_links"] = manifest["link_to_relocate"]
buildinfo["hardlinks_deduped"] = manifest["hardlinks_deduped"]
buildinfo["prefix_to_hash"] = prefix_to_hash
filename = buildinfo_file_name(workdir)
with open(filename, "w") as outfile:
@ -1441,6 +1482,38 @@ def check_package_relocatable(workdir, spec, allow_root):
relocate.raise_if_not_relocatable(cur_path_names, allow_root)
def dedupe_hardlinks_if_necessary(root, buildinfo):
"""Updates a buildinfo dict for old archives that did
not dedupe hardlinks. De-duping hardlinks is necessary
when relocating files in parallel and in-place. This
means we must preserve inodes when relocating."""
# New archives don't need this.
if buildinfo.get("hardlinks_deduped", False):
return
# Clearly we can assume that an inode is either in the
# textfile or binary group, but let's just stick to
# a single set of visited nodes.
visited = set()
# Note: we do *not* dedupe hardlinked symlinks, since
# it seems difficult or even impossible to relink
# symlinks while preserving inode.
for key in ("relocate_textfiles", "relocate_binaries"):
if key not in buildinfo:
continue
new_list = []
for rel_path in buildinfo[key]:
stat_result = os.lstat(os.path.join(root, rel_path))
identifier = (stat_result.st_dev, stat_result.st_ino)
if identifier in visited:
continue
visited.add(identifier)
new_list.append(rel_path)
buildinfo[key] = new_list
def relocate_package(spec, allow_root):
"""
Relocate the given package
@ -1503,6 +1576,9 @@ def relocate_package(spec, allow_root):
tty.debug("Relocating package from", "%s to %s." % (old_layout_root, new_layout_root))
# Old archives maybe have hardlinks repeated.
dedupe_hardlinks_if_necessary(workdir, buildinfo)
def is_backup_file(file):
return file.endswith("~")
@ -1548,7 +1624,7 @@ def is_backup_file(file):
# For all buildcaches
# relocate the install prefixes in text files including dependencies
relocate.relocate_text(text_names, prefix_to_prefix_text)
relocate.unsafe_relocate_text(text_names, prefix_to_prefix_text)
paths_to_relocate = [old_prefix, old_layout_root]
paths_to_relocate.extend(prefix_to_hash.keys())
@ -1564,13 +1640,13 @@ def is_backup_file(file):
)
)
# relocate the install prefixes in binary files including dependencies
relocate.relocate_text_bin(files_to_relocate, prefix_to_prefix_bin)
relocate.unsafe_relocate_text_bin(files_to_relocate, prefix_to_prefix_bin)
# If we are installing back to the same location
# relocate the sbang location if the spack directory changed
else:
if old_spack_prefix != new_spack_prefix:
relocate.relocate_text(text_names, prefix_to_prefix_text)
relocate.unsafe_relocate_text(text_names, prefix_to_prefix_text)
def _extract_inner_tarball(spec, filename, extract_to, unsigned, remote_checksum):

View file

@ -95,11 +95,11 @@ def view_copy(src, dst, view, spec=None):
prefix_to_projection[dep.prefix] = view.get_projection_for_spec(dep)
if spack.relocate.is_binary(dst):
spack.relocate.relocate_text_bin(binaries=[dst], prefixes=prefix_to_projection)
spack.relocate.unsafe_relocate_text_bin(binaries=[dst], prefixes=prefix_to_projection)
else:
prefix_to_projection[spack.store.layout.root] = view._root
prefix_to_projection[orig_sbang] = new_sbang
spack.relocate.relocate_text(files=[dst], prefixes=prefix_to_projection)
spack.relocate.unsafe_relocate_text(files=[dst], prefixes=prefix_to_projection)
try:
stat = os.stat(src)
os.chown(dst, stat.st_uid, stat.st_gid)

View file

@ -744,12 +744,15 @@ def relocate_links(links, orig_layout_root, orig_install_prefix, new_install_pre
tty.warn(msg.format(link_target, abs_link, new_install_prefix))
def relocate_text(files, prefixes, concurrency=32):
def unsafe_relocate_text(files, prefixes, concurrency=32):
"""Relocate text file from the original installation prefix to the
new prefix.
Relocation also affects the the path in Spack's sbang script.
Note: unsafe when files contains duplicates, such as repeated paths,
symlinks, hardlinks.
Args:
files (list): Text files to be relocated
prefixes (OrderedDict): String prefixes which need to be changed
@ -786,11 +789,14 @@ def relocate_text(files, prefixes, concurrency=32):
tp.join()
def relocate_text_bin(binaries, prefixes, concurrency=32):
def unsafe_relocate_text_bin(binaries, prefixes, concurrency=32):
"""Replace null terminated path strings hard coded into binaries.
The new install prefix must be shorter than the original one.
Note: unsafe when files contains duplicates, such as repeated paths,
symlinks, hardlinks.
Args:
binaries (list): binaries to be relocated
prefixes (OrderedDict): String prefixes which need to be changed.

View file

@ -70,7 +70,7 @@ def rewire_node(spec, explicit):
for rel_path in manifest.get("text_to_relocate", [])
]
if text_to_relocate:
relocate.relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix)
relocate.unsafe_relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix)
bins_to_relocate = [
os.path.join(tempdir, spec.dag_hash(), rel_path)
@ -97,7 +97,7 @@ def rewire_node(spec, explicit):
spec.build_spec.prefix,
spec.prefix,
)
relocate.relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix)
relocate.unsafe_relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix)
# Copy package into place, except for spec.json (because spec.json
# describes the old spec and not the new spliced spec).
shutil.copytree(

View file

@ -11,6 +11,8 @@
import py
import pytest
from llnl.util.filesystem import visit_directory_tree
import spack.binary_distribution as bindist
import spack.config
import spack.hooks.sbang as sbang
@ -633,3 +635,47 @@ def test_FetchCacheError_pretty_printing_single():
assert "Multiple errors" not in str_e
assert "RuntimeError: Oops!" in str_e
assert str_e.rstrip() == str_e
def test_build_manifest_visitor(tmpdir):
dir = "directory"
file = os.path.join("directory", "file")
with tmpdir.as_cwd():
# Create a file inside a directory
os.mkdir(dir)
with open(file, "wb") as f:
f.write(b"example file")
# Symlink the dir
os.symlink(dir, "symlink_to_directory")
# Symlink the file
os.symlink(file, "symlink_to_file")
# Hardlink the file
os.link(file, "hardlink_of_file")
# Hardlinked symlinks: seems like this is only a thing on Linux,
# on Darwin the symlink *target* is hardlinked, on Linux the
# symlink *itself* is hardlinked.
if sys.platform.startswith("linux"):
os.link("symlink_to_file", "hardlink_of_symlink_to_file")
os.link("symlink_to_directory", "hardlink_of_symlink_to_directory")
visitor = bindist.BuildManifestVisitor()
visit_directory_tree(str(tmpdir), visitor)
# We de-dupe hardlinks of files, so there should really be just one file
assert len(visitor.files) == 1
# We do not de-dupe symlinks, cause it's unclear how to update symlinks
# in-place, preserving inodes.
if sys.platform.startswith("linux"):
assert len(visitor.symlinks) == 4 # includes hardlinks of symlinks.
else:
assert len(visitor.symlinks) == 2
with tmpdir.as_cwd():
assert not any(os.path.islink(f) or os.path.isdir(f) for f in visitor.files)
assert all(os.path.islink(f) for f in visitor.symlinks)

View file

@ -36,7 +36,7 @@
needs_binary_relocation,
needs_text_relocation,
relocate_links,
relocate_text,
unsafe_relocate_text,
)
from spack.spec import Spec
@ -190,7 +190,7 @@ def test_buildcache(mock_archive, tmpdir):
@pytest.mark.usefixtures("install_mockery")
def test_relocate_text(tmpdir):
def test_unsafe_relocate_text(tmpdir):
spec = Spec("trivial-install-test-package")
spec.concretize()
with tmpdir.as_cwd():
@ -203,7 +203,7 @@ def test_relocate_text(tmpdir):
filenames = [filename]
new_dir = "/opt/rh/devtoolset/"
# Singleton dict doesn't matter if Ordered
relocate_text(filenames, {old_dir: new_dir})
unsafe_relocate_text(filenames, {old_dir: new_dir})
with open(filename, "r") as script:
for line in script:
assert new_dir in line

View file

@ -406,7 +406,7 @@ def test_relocate_text_bin(hello_world, copy_binary, tmpdir):
orig_path_bytes = str(orig_binary.dirpath()).encode("utf-8")
new_path_bytes = str(new_binary.dirpath()).encode("utf-8")
spack.relocate.relocate_text_bin([str(new_binary)], {orig_path_bytes: new_path_bytes})
spack.relocate.unsafe_relocate_text_bin([str(new_binary)], {orig_path_bytes: new_path_bytes})
# Check original directory is not there anymore and it was
# substituted with the new one
@ -421,7 +421,7 @@ def test_relocate_text_bin_raise_if_new_prefix_is_longer(tmpdir):
with open(fpath, "w") as f:
f.write("/short")
with pytest.raises(spack.relocate.BinaryTextReplaceError):
spack.relocate.relocate_text_bin([fpath], {short_prefix: long_prefix})
spack.relocate.unsafe_relocate_text_bin([fpath], {short_prefix: long_prefix})
@pytest.mark.requires_executables("install_name_tool", "file", "cc")