binary_distribution.py: support build cache layout 2 (#41773)

Add forward compatibility for tarballs created by Spack 0.22, which
use build cache layout version 2.

Spack 0.21 continues to produce build cache layout version 1 tarballs.

Build cache layout version 2 also lists parent directories of the
package prefix in the tarball, which is required for certain container
runtimes.
This commit is contained in:
Harmen Stoppels 2024-01-10 13:21:15 +01:00
parent 0295b466d7
commit 19e86088cd
2 changed files with 77 additions and 22 deletions

View file

@ -69,6 +69,7 @@
BUILD_CACHE_RELATIVE_PATH = "build_cache"
BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp"
CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1
FORWARD_COMPAT_BUILD_CACHE_LAYOUT_VERSION = 2
class BuildCacheDatabase(spack_db.Database):
@ -1696,7 +1697,7 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
try:
_get_valid_spec_file(
local_specfile_stage.save_filename,
CURRENT_BUILD_CACHE_LAYOUT_VERSION,
FORWARD_COMPAT_BUILD_CACHE_LAYOUT_VERSION,
)
except InvalidMetadataFile as e:
tty.warn(
@ -1737,7 +1738,7 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
try:
_get_valid_spec_file(
local_specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
local_specfile_path, FORWARD_COMPAT_BUILD_CACHE_LAYOUT_VERSION
)
except InvalidMetadataFile as e:
tty.warn(
@ -2026,11 +2027,12 @@ def _extract_inner_tarball(spec, filename, extract_to, unsigned, remote_checksum
def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
"""Strip the top-level directory `prefix` from the member names in a tarfile."""
"""Yield all members of tarfile that start with given prefix, and strip that prefix (including
symlinks)"""
# Including trailing /, otherwise we end up with absolute paths.
regex = re.compile(re.escape(prefix) + "/*")
# Remove the top-level directory from the member (link)names.
# Only yield members in the package prefix.
# Note: when a tarfile is created, relative in-prefix symlinks are
# expanded to matching member names of tarfile entries. So, we have
# to ensure that those are updated too.
@ -2038,12 +2040,14 @@ def _tar_strip_component(tar: tarfile.TarFile, prefix: str):
# them.
for m in tar.getmembers():
result = regex.match(m.name)
assert result is not None
if not result:
continue
m.name = m.name[result.end() :]
if m.linkname:
result = regex.match(m.linkname)
if result:
m.linkname = m.linkname[result.end() :]
yield m
def extract_tarball(spec, download_result, unsigned=False, force=False, timer=timer.NULL_TIMER):
@ -2067,7 +2071,7 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
specfile_path = download_result["specfile_stage"].save_filename
spec_dict, layout_version = _get_valid_spec_file(
specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
specfile_path, FORWARD_COMPAT_BUILD_CACHE_LAYOUT_VERSION
)
bchecksum = spec_dict["binary_cache_checksum"]
@ -2086,7 +2090,7 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
_delete_staged_downloads(download_result)
shutil.rmtree(tmpdir)
raise e
elif layout_version == 1:
elif 1 <= layout_version <= 2:
# Newer buildcache layout: the .spack file contains just
# in the install tree, the signature, if it exists, is
# wrapped around the spec.json at the root. If sig verify
@ -2113,8 +2117,10 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
try:
with closing(tarfile.open(tarfile_path, "r")) as tar:
# Remove install prefix from tarfil to extract directly into spec.prefix
_tar_strip_component(tar, prefix=_ensure_common_prefix(tar))
tar.extractall(path=spec.prefix)
tar.extractall(
path=spec.prefix,
members=_tar_strip_component(tar, prefix=_ensure_common_prefix(tar)),
)
except Exception:
shutil.rmtree(spec.prefix, ignore_errors=True)
_delete_staged_downloads(download_result)
@ -2149,20 +2155,47 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
def _ensure_common_prefix(tar: tarfile.TarFile) -> str:
# Get the shortest length directory.
common_prefix = min((e.name for e in tar.getmembers() if e.isdir()), key=len, default=None)
if common_prefix is None:
raise ValueError("Tarball does not contain a common prefix")
# Validate that each file starts with the prefix
for member in tar.getmembers():
if not member.name.startswith(common_prefix):
raise ValueError(
f"Tarball contains file {member.name} outside of prefix {common_prefix}"
# Find the lowest `binary_distribution` file (hard-coded forward slash is on purpose).
binary_distribution = min(
(
e.name
for e in tar.getmembers()
if e.isfile() and e.name.endswith(".spack/binary_distribution")
),
key=len,
default=None,
)
return common_prefix
if binary_distribution is None:
raise ValueError("Tarball is not a Spack package, missing binary_distribution file")
pkg_path = pathlib.PurePosixPath(binary_distribution).parent.parent
# Even the most ancient Spack version has required to list the dir of the package itself, so
# guard against broken tarballs where `path.parent.parent` is empty.
if pkg_path == pathlib.PurePosixPath():
raise ValueError("Invalid tarball, missing package prefix dir")
pkg_prefix = str(pkg_path)
# Ensure all tar entries are in the pkg_prefix dir, and if they're not, they should be parent
# dirs of it.
has_prefix = False
for member in tar.getmembers():
stripped = member.name.rstrip("/")
if not (
stripped.startswith(pkg_prefix) or member.isdir() and pkg_prefix.startswith(stripped)
):
raise ValueError(f"Tarball contains file {stripped} outside of prefix {pkg_prefix}")
if member.isdir() and stripped == pkg_prefix:
has_prefix = True
# This is technically not required, but let's be defensive about the existence of the package
# prefix dir.
if not has_prefix:
raise ValueError(f"Tarball does not contain a common prefix {pkg_prefix}")
return pkg_prefix
def install_root_node(spec, unsigned=False, force=False, sha256=None):

View file

@ -201,6 +201,9 @@ def dummy_prefix(tmpdir):
with open(data, "w") as f:
f.write("hello world")
with open(p.join(".spack", "binary_distribution"), "w") as f:
f.write("{}")
os.symlink("app", relative_app_link)
os.symlink(app, absolute_app_link)
@ -1024,7 +1027,9 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir):
bindist._tar_strip_component(tar, common_prefix)
# Extract into prefix2
tar.extractall(path="prefix2")
tar.extractall(
path="prefix2", members=bindist._tar_strip_component(tar, common_prefix)
)
# Verify files are all there at the correct level.
assert set(os.listdir("prefix2")) == {"bin", "share", ".spack"}
@ -1044,13 +1049,30 @@ def test_tarball_common_prefix(dummy_prefix, tmpdir):
)
def test_tarfile_missing_binary_distribution_file(tmpdir):
"""A tarfile that does not contain a .spack/binary_distribution file cannot be
used to install."""
with tmpdir.as_cwd():
# An empty .spack dir.
with tarfile.open("empty.tar", mode="w") as tar:
tarinfo = tarfile.TarInfo(name="example/.spack")
tarinfo.type = tarfile.DIRTYPE
tar.addfile(tarinfo)
with pytest.raises(ValueError, match="missing binary_distribution file"):
bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r"))
def test_tarfile_without_common_directory_prefix_fails(tmpdir):
"""A tarfile that only contains files without a common package directory
should fail to extract, as we won't know where to put the files."""
with tmpdir.as_cwd():
# Create a broken tarball with just a file, no directories.
with tarfile.open("empty.tar", mode="w") as tar:
tar.addfile(tarfile.TarInfo(name="example/file"), fileobj=io.BytesIO(b"hello"))
tar.addfile(
tarfile.TarInfo(name="example/.spack/binary_distribution"),
fileobj=io.BytesIO(b"hello"),
)
with pytest.raises(ValueError, match="Tarball does not contain a common prefix"):
bindist._ensure_common_prefix(tarfile.open("empty.tar", mode="r"))
@ -1166,7 +1188,7 @@ def test_get_valid_spec_file_no_json(tmp_path, filename):
def test_download_tarball_with_unsupported_layout_fails(tmp_path, mutable_config, capsys):
layout_version = bindist.CURRENT_BUILD_CACHE_LAYOUT_VERSION + 1
layout_version = bindist.FORWARD_COMPAT_BUILD_CACHE_LAYOUT_VERSION + 1
spec = Spec("gmake@4.4.1%gcc@13.1.0 arch=linux-ubuntu23.04-zen2")
spec._mark_concrete()
spec_dict = spec.to_dict()