buildcache: skip unrecognized metadata files (#40941)
This commit improves forward compatibility of Spack with newer build cache metadata formats. Before this commit, invalid or unrecognized metadata would be fatal errors, now they just cause a mirror to be skipped. Co-authored-by: Harmen Stoppels <me@harmenstoppels.nl>
This commit is contained in:
parent
1baf712b87
commit
67f20c3e5c
2 changed files with 163 additions and 31 deletions
|
@ -66,8 +66,9 @@
|
|||
from spack.stage import Stage
|
||||
from spack.util.executable import which
|
||||
|
||||
_build_cache_relative_path = "build_cache"
|
||||
_build_cache_keys_relative_path = "_pgp"
|
||||
BUILD_CACHE_RELATIVE_PATH = "build_cache"
|
||||
BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp"
|
||||
CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1
|
||||
|
||||
|
||||
class BuildCacheDatabase(spack_db.Database):
|
||||
|
@ -481,7 +482,7 @@ def _fetch_and_cache_index(self, mirror_url, cache_entry={}):
|
|||
scheme = urllib.parse.urlparse(mirror_url).scheme
|
||||
|
||||
if scheme != "oci" and not web_util.url_exists(
|
||||
url_util.join(mirror_url, _build_cache_relative_path, "index.json")
|
||||
url_util.join(mirror_url, BUILD_CACHE_RELATIVE_PATH, "index.json")
|
||||
):
|
||||
return False
|
||||
|
||||
|
@ -600,6 +601,10 @@ def __init__(self, msg):
|
|||
super().__init__(msg)
|
||||
|
||||
|
||||
class InvalidMetadataFile(spack.error.SpackError):
|
||||
pass
|
||||
|
||||
|
||||
class UnsignedPackageException(spack.error.SpackError):
|
||||
"""
|
||||
Raised if installation of unsigned package is attempted without
|
||||
|
@ -614,11 +619,11 @@ def compute_hash(data):
|
|||
|
||||
|
||||
def build_cache_relative_path():
|
||||
return _build_cache_relative_path
|
||||
return BUILD_CACHE_RELATIVE_PATH
|
||||
|
||||
|
||||
def build_cache_keys_relative_path():
|
||||
return _build_cache_keys_relative_path
|
||||
return BUILD_CACHE_KEYS_RELATIVE_PATH
|
||||
|
||||
|
||||
def build_cache_prefix(prefix):
|
||||
|
@ -1401,7 +1406,7 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
|
|||
spec_dict = sjson.load(content)
|
||||
else:
|
||||
raise ValueError("{0} not a valid spec file type".format(spec_file))
|
||||
spec_dict["buildcache_layout_version"] = 1
|
||||
spec_dict["buildcache_layout_version"] = CURRENT_BUILD_CACHE_LAYOUT_VERSION
|
||||
spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum}
|
||||
|
||||
with open(specfile_path, "w") as outfile:
|
||||
|
@ -1560,6 +1565,42 @@ def _delete_staged_downloads(download_result):
|
|||
download_result["specfile_stage"].destroy()
|
||||
|
||||
|
||||
def _get_valid_spec_file(path: str, max_supported_layout: int) -> Tuple[Dict, int]:
|
||||
"""Read and validate a spec file, returning the spec dict with its layout version, or raising
|
||||
InvalidMetadataFile if invalid."""
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
binary_content = f.read()
|
||||
except OSError:
|
||||
raise InvalidMetadataFile(f"No such file: {path}")
|
||||
|
||||
# In the future we may support transparently decompressing compressed spec files.
|
||||
if binary_content[:2] == b"\x1f\x8b":
|
||||
raise InvalidMetadataFile("Compressed spec files are not supported")
|
||||
|
||||
try:
|
||||
as_string = binary_content.decode("utf-8")
|
||||
if path.endswith(".json.sig"):
|
||||
spec_dict = Spec.extract_json_from_clearsig(as_string)
|
||||
else:
|
||||
spec_dict = json.loads(as_string)
|
||||
except Exception as e:
|
||||
raise InvalidMetadataFile(f"Could not parse {path} due to: {e}") from e
|
||||
|
||||
# Ensure this version is not too new.
|
||||
try:
|
||||
layout_version = int(spec_dict.get("buildcache_layout_version", 0))
|
||||
except ValueError as e:
|
||||
raise InvalidMetadataFile("Could not parse layout version") from e
|
||||
|
||||
if layout_version > max_supported_layout:
|
||||
raise InvalidMetadataFile(
|
||||
f"Layout version {layout_version} is too new for this version of Spack"
|
||||
)
|
||||
|
||||
return spec_dict, layout_version
|
||||
|
||||
|
||||
def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
|
||||
"""
|
||||
Download binary tarball for given package into stage area, returning
|
||||
|
@ -1652,6 +1693,18 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
|
|||
try:
|
||||
local_specfile_stage.fetch()
|
||||
local_specfile_stage.check()
|
||||
try:
|
||||
_get_valid_spec_file(
|
||||
local_specfile_stage.save_filename,
|
||||
CURRENT_BUILD_CACHE_LAYOUT_VERSION,
|
||||
)
|
||||
except InvalidMetadataFile as e:
|
||||
tty.warn(
|
||||
f"Ignoring binary package for {spec.name}/{spec.dag_hash()[:7]} "
|
||||
f"from {mirror} due to invalid metadata file: {e}"
|
||||
)
|
||||
local_specfile_stage.destroy()
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
local_specfile_stage.cache_local()
|
||||
|
@ -1674,14 +1727,26 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
|
|||
|
||||
else:
|
||||
ext = "json.sig" if try_signed else "json"
|
||||
specfile_path = url_util.join(mirror, _build_cache_relative_path, specfile_prefix)
|
||||
specfile_path = url_util.join(mirror, BUILD_CACHE_RELATIVE_PATH, specfile_prefix)
|
||||
specfile_url = f"{specfile_path}.{ext}"
|
||||
spackfile_url = url_util.join(mirror, _build_cache_relative_path, tarball)
|
||||
spackfile_url = url_util.join(mirror, BUILD_CACHE_RELATIVE_PATH, tarball)
|
||||
local_specfile_stage = try_fetch(specfile_url)
|
||||
if local_specfile_stage:
|
||||
local_specfile_path = local_specfile_stage.save_filename
|
||||
signature_verified = False
|
||||
|
||||
try:
|
||||
_get_valid_spec_file(
|
||||
local_specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
|
||||
)
|
||||
except InvalidMetadataFile as e:
|
||||
tty.warn(
|
||||
f"Ignoring binary package for {spec.name}/{spec.dag_hash()[:7]} "
|
||||
f"from {mirror} due to invalid metadata file: {e}"
|
||||
)
|
||||
local_specfile_stage.destroy()
|
||||
continue
|
||||
|
||||
if try_signed and not unsigned:
|
||||
# If we found a signed specfile at the root, try to verify
|
||||
# the signature immediately. We will not download the
|
||||
|
@ -2001,24 +2066,16 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
|
|||
)
|
||||
|
||||
specfile_path = download_result["specfile_stage"].save_filename
|
||||
|
||||
with open(specfile_path, "r") as inputfile:
|
||||
content = inputfile.read()
|
||||
if specfile_path.endswith(".json.sig"):
|
||||
spec_dict = Spec.extract_json_from_clearsig(content)
|
||||
else:
|
||||
spec_dict = sjson.load(content)
|
||||
|
||||
spec_dict, layout_version = _get_valid_spec_file(
|
||||
specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
|
||||
)
|
||||
bchecksum = spec_dict["binary_cache_checksum"]
|
||||
|
||||
filename = download_result["tarball_stage"].save_filename
|
||||
signature_verified = download_result["signature_verified"]
|
||||
tmpdir = None
|
||||
|
||||
if (
|
||||
"buildcache_layout_version" not in spec_dict
|
||||
or int(spec_dict["buildcache_layout_version"]) < 1
|
||||
):
|
||||
if layout_version == 0:
|
||||
# Handle the older buildcache layout where the .spack file
|
||||
# contains a spec json, maybe an .asc file (signature),
|
||||
# and another tarball containing the actual install tree.
|
||||
|
@ -2029,7 +2086,7 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
|
|||
_delete_staged_downloads(download_result)
|
||||
shutil.rmtree(tmpdir)
|
||||
raise e
|
||||
else:
|
||||
elif layout_version == 1:
|
||||
# Newer buildcache layout: the .spack file contains just
|
||||
# in the install tree, the signature, if it exists, is
|
||||
# wrapped around the spec.json at the root. If sig verify
|
||||
|
@ -2053,7 +2110,6 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
|
|||
raise NoChecksumException(
|
||||
tarfile_path, size, contents, "sha256", expected, local_checksum
|
||||
)
|
||||
|
||||
try:
|
||||
with closing(tarfile.open(tarfile_path, "r")) as tar:
|
||||
# Remove install prefix from tarfil to extract directly into spec.prefix
|
||||
|
@ -2184,10 +2240,10 @@ def try_direct_fetch(spec, mirrors=None):
|
|||
|
||||
for mirror in binary_mirrors:
|
||||
buildcache_fetch_url_json = url_util.join(
|
||||
mirror.fetch_url, _build_cache_relative_path, specfile_name
|
||||
mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, specfile_name
|
||||
)
|
||||
buildcache_fetch_url_signed_json = url_util.join(
|
||||
mirror.fetch_url, _build_cache_relative_path, signed_specfile_name
|
||||
mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, signed_specfile_name
|
||||
)
|
||||
try:
|
||||
_, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json)
|
||||
|
@ -2292,7 +2348,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
|
|||
for mirror in mirror_collection.values():
|
||||
fetch_url = mirror.fetch_url
|
||||
keys_url = url_util.join(
|
||||
fetch_url, _build_cache_relative_path, _build_cache_keys_relative_path
|
||||
fetch_url, BUILD_CACHE_RELATIVE_PATH, BUILD_CACHE_KEYS_RELATIVE_PATH
|
||||
)
|
||||
keys_index = url_util.join(keys_url, "index.json")
|
||||
|
||||
|
@ -2357,7 +2413,7 @@ def push_keys(*mirrors, **kwargs):
|
|||
for mirror in mirrors:
|
||||
push_url = getattr(mirror, "push_url", mirror)
|
||||
keys_url = url_util.join(
|
||||
push_url, _build_cache_relative_path, _build_cache_keys_relative_path
|
||||
push_url, BUILD_CACHE_RELATIVE_PATH, BUILD_CACHE_KEYS_RELATIVE_PATH
|
||||
)
|
||||
keys_local = url_util.local_file_path(keys_url)
|
||||
|
||||
|
@ -2495,11 +2551,11 @@ def download_buildcache_entry(file_descriptions, mirror_url=None):
|
|||
)
|
||||
|
||||
if mirror_url:
|
||||
mirror_root = os.path.join(mirror_url, _build_cache_relative_path)
|
||||
mirror_root = os.path.join(mirror_url, BUILD_CACHE_RELATIVE_PATH)
|
||||
return _download_buildcache_entry(mirror_root, file_descriptions)
|
||||
|
||||
for mirror in spack.mirror.MirrorCollection(binary=True).values():
|
||||
mirror_root = os.path.join(mirror.fetch_url, _build_cache_relative_path)
|
||||
mirror_root = os.path.join(mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH)
|
||||
|
||||
if _download_buildcache_entry(mirror_root, file_descriptions):
|
||||
return True
|
||||
|
@ -2590,7 +2646,7 @@ def __init__(self, url, local_hash, urlopen=web_util.urlopen):
|
|||
|
||||
def get_remote_hash(self):
|
||||
# Failure to fetch index.json.hash is not fatal
|
||||
url_index_hash = url_util.join(self.url, _build_cache_relative_path, "index.json.hash")
|
||||
url_index_hash = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json.hash")
|
||||
try:
|
||||
response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers))
|
||||
except urllib.error.URLError:
|
||||
|
@ -2611,7 +2667,7 @@ def conditional_fetch(self) -> FetchIndexResult:
|
|||
return FetchIndexResult(etag=None, hash=None, data=None, fresh=True)
|
||||
|
||||
# Otherwise, download index.json
|
||||
url_index = url_util.join(self.url, _build_cache_relative_path, "index.json")
|
||||
url_index = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
|
||||
|
||||
try:
|
||||
response = self.urlopen(urllib.request.Request(url_index, headers=self.headers))
|
||||
|
@ -2655,7 +2711,7 @@ def __init__(self, url, etag, urlopen=web_util.urlopen):
|
|||
|
||||
def conditional_fetch(self) -> FetchIndexResult:
|
||||
# Just do a conditional fetch immediately
|
||||
url = url_util.join(self.url, _build_cache_relative_path, "index.json")
|
||||
url = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
|
||||
headers = {
|
||||
"User-Agent": web_util.SPACK_USER_AGENT,
|
||||
"If-None-Match": '"{}"'.format(self.etag),
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
import filecmp
|
||||
import glob
|
||||
import gzip
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
|
@ -1112,3 +1114,77 @@ def test_tarfile_of_spec_prefix(tmpdir):
|
|||
assert tar.getmember(f"{expected_prefix}/b_directory/file").isreg()
|
||||
assert tar.getmember(f"{expected_prefix}/c_directory").isdir()
|
||||
assert tar.getmember(f"{expected_prefix}/c_directory/file").isreg()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("layout,expect_success", [(None, True), (1, True), (2, False)])
|
||||
def test_get_valid_spec_file(tmp_path, layout, expect_success):
|
||||
# Test reading a spec.json file that does not specify a layout version.
|
||||
spec_dict = Spec("example").to_dict()
|
||||
path = tmp_path / "spec.json"
|
||||
effective_layout = layout or 0 # If not specified it should be 0
|
||||
|
||||
# Add a layout version
|
||||
if layout is not None:
|
||||
spec_dict["buildcache_layout_version"] = layout
|
||||
|
||||
# Save to file
|
||||
with open(path, "w") as f:
|
||||
json.dump(spec_dict, f)
|
||||
|
||||
try:
|
||||
spec_dict_disk, layout_disk = bindist._get_valid_spec_file(
|
||||
str(path), max_supported_layout=1
|
||||
)
|
||||
assert expect_success
|
||||
assert spec_dict_disk == spec_dict
|
||||
assert layout_disk == effective_layout
|
||||
except bindist.InvalidMetadataFile:
|
||||
assert not expect_success
|
||||
|
||||
|
||||
def test_get_valid_spec_file_doesnt_exist(tmp_path):
|
||||
with pytest.raises(bindist.InvalidMetadataFile, match="No such file"):
|
||||
bindist._get_valid_spec_file(str(tmp_path / "no-such-file"), max_supported_layout=1)
|
||||
|
||||
|
||||
def test_get_valid_spec_file_gzipped(tmp_path):
|
||||
# Create a gzipped file, contents don't matter
|
||||
path = tmp_path / "spec.json.gz"
|
||||
with gzip.open(path, "wb") as f:
|
||||
f.write(b"hello")
|
||||
with pytest.raises(
|
||||
bindist.InvalidMetadataFile, match="Compressed spec files are not supported"
|
||||
):
|
||||
bindist._get_valid_spec_file(str(path), max_supported_layout=1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("filename", ["spec.json", "spec.json.sig"])
|
||||
def test_get_valid_spec_file_no_json(tmp_path, filename):
|
||||
tmp_path.joinpath(filename).write_text("not json")
|
||||
with pytest.raises(bindist.InvalidMetadataFile):
|
||||
bindist._get_valid_spec_file(str(tmp_path / filename), max_supported_layout=1)
|
||||
|
||||
|
||||
def test_download_tarball_with_unsupported_layout_fails(tmp_path, mutable_config, capsys):
|
||||
layout_version = bindist.CURRENT_BUILD_CACHE_LAYOUT_VERSION + 1
|
||||
spec = Spec("gmake@4.4.1%gcc@13.1.0 arch=linux-ubuntu23.04-zen2")
|
||||
spec._mark_concrete()
|
||||
spec_dict = spec.to_dict()
|
||||
spec_dict["buildcache_layout_version"] = layout_version
|
||||
|
||||
# Setup a basic local build cache structure
|
||||
path = (
|
||||
tmp_path / bindist.build_cache_relative_path() / bindist.tarball_name(spec, ".spec.json")
|
||||
)
|
||||
path.parent.mkdir(parents=True)
|
||||
with open(path, "w") as f:
|
||||
json.dump(spec_dict, f)
|
||||
|
||||
# Configure as a mirror.
|
||||
mirror_cmd("add", "test-mirror", str(tmp_path))
|
||||
|
||||
# Shouldn't be able "download" this.
|
||||
assert bindist.download_tarball(spec, unsigned=True) is None
|
||||
|
||||
# And there should be a warning about an unsupported layout version.
|
||||
assert f"Layout version {layout_version} is too new" in capsys.readouterr().err
|
||||
|
|
Loading…
Reference in a new issue