buildcache: skip unrecognized metadata files (#40941)
This commit improves forward compatibility of Spack with newer build cache metadata formats. Before this commit, invalid or unrecognized metadata would be fatal errors, now they just cause a mirror to be skipped. Co-authored-by: Harmen Stoppels <me@harmenstoppels.nl>
This commit is contained in:
parent
1baf712b87
commit
67f20c3e5c
2 changed files with 163 additions and 31 deletions
|
@ -66,8 +66,9 @@
|
||||||
from spack.stage import Stage
|
from spack.stage import Stage
|
||||||
from spack.util.executable import which
|
from spack.util.executable import which
|
||||||
|
|
||||||
_build_cache_relative_path = "build_cache"
|
BUILD_CACHE_RELATIVE_PATH = "build_cache"
|
||||||
_build_cache_keys_relative_path = "_pgp"
|
BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp"
|
||||||
|
CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1
|
||||||
|
|
||||||
|
|
||||||
class BuildCacheDatabase(spack_db.Database):
|
class BuildCacheDatabase(spack_db.Database):
|
||||||
|
@ -481,7 +482,7 @@ def _fetch_and_cache_index(self, mirror_url, cache_entry={}):
|
||||||
scheme = urllib.parse.urlparse(mirror_url).scheme
|
scheme = urllib.parse.urlparse(mirror_url).scheme
|
||||||
|
|
||||||
if scheme != "oci" and not web_util.url_exists(
|
if scheme != "oci" and not web_util.url_exists(
|
||||||
url_util.join(mirror_url, _build_cache_relative_path, "index.json")
|
url_util.join(mirror_url, BUILD_CACHE_RELATIVE_PATH, "index.json")
|
||||||
):
|
):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -600,6 +601,10 @@ def __init__(self, msg):
|
||||||
super().__init__(msg)
|
super().__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidMetadataFile(spack.error.SpackError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class UnsignedPackageException(spack.error.SpackError):
|
class UnsignedPackageException(spack.error.SpackError):
|
||||||
"""
|
"""
|
||||||
Raised if installation of unsigned package is attempted without
|
Raised if installation of unsigned package is attempted without
|
||||||
|
@ -614,11 +619,11 @@ def compute_hash(data):
|
||||||
|
|
||||||
|
|
||||||
def build_cache_relative_path():
|
def build_cache_relative_path():
|
||||||
return _build_cache_relative_path
|
return BUILD_CACHE_RELATIVE_PATH
|
||||||
|
|
||||||
|
|
||||||
def build_cache_keys_relative_path():
|
def build_cache_keys_relative_path():
|
||||||
return _build_cache_keys_relative_path
|
return BUILD_CACHE_KEYS_RELATIVE_PATH
|
||||||
|
|
||||||
|
|
||||||
def build_cache_prefix(prefix):
|
def build_cache_prefix(prefix):
|
||||||
|
@ -1401,7 +1406,7 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
|
||||||
spec_dict = sjson.load(content)
|
spec_dict = sjson.load(content)
|
||||||
else:
|
else:
|
||||||
raise ValueError("{0} not a valid spec file type".format(spec_file))
|
raise ValueError("{0} not a valid spec file type".format(spec_file))
|
||||||
spec_dict["buildcache_layout_version"] = 1
|
spec_dict["buildcache_layout_version"] = CURRENT_BUILD_CACHE_LAYOUT_VERSION
|
||||||
spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum}
|
spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum}
|
||||||
|
|
||||||
with open(specfile_path, "w") as outfile:
|
with open(specfile_path, "w") as outfile:
|
||||||
|
@ -1560,6 +1565,42 @@ def _delete_staged_downloads(download_result):
|
||||||
download_result["specfile_stage"].destroy()
|
download_result["specfile_stage"].destroy()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_valid_spec_file(path: str, max_supported_layout: int) -> Tuple[Dict, int]:
|
||||||
|
"""Read and validate a spec file, returning the spec dict with its layout version, or raising
|
||||||
|
InvalidMetadataFile if invalid."""
|
||||||
|
try:
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
binary_content = f.read()
|
||||||
|
except OSError:
|
||||||
|
raise InvalidMetadataFile(f"No such file: {path}")
|
||||||
|
|
||||||
|
# In the future we may support transparently decompressing compressed spec files.
|
||||||
|
if binary_content[:2] == b"\x1f\x8b":
|
||||||
|
raise InvalidMetadataFile("Compressed spec files are not supported")
|
||||||
|
|
||||||
|
try:
|
||||||
|
as_string = binary_content.decode("utf-8")
|
||||||
|
if path.endswith(".json.sig"):
|
||||||
|
spec_dict = Spec.extract_json_from_clearsig(as_string)
|
||||||
|
else:
|
||||||
|
spec_dict = json.loads(as_string)
|
||||||
|
except Exception as e:
|
||||||
|
raise InvalidMetadataFile(f"Could not parse {path} due to: {e}") from e
|
||||||
|
|
||||||
|
# Ensure this version is not too new.
|
||||||
|
try:
|
||||||
|
layout_version = int(spec_dict.get("buildcache_layout_version", 0))
|
||||||
|
except ValueError as e:
|
||||||
|
raise InvalidMetadataFile("Could not parse layout version") from e
|
||||||
|
|
||||||
|
if layout_version > max_supported_layout:
|
||||||
|
raise InvalidMetadataFile(
|
||||||
|
f"Layout version {layout_version} is too new for this version of Spack"
|
||||||
|
)
|
||||||
|
|
||||||
|
return spec_dict, layout_version
|
||||||
|
|
||||||
|
|
||||||
def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
|
def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
|
||||||
"""
|
"""
|
||||||
Download binary tarball for given package into stage area, returning
|
Download binary tarball for given package into stage area, returning
|
||||||
|
@ -1652,6 +1693,18 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
|
||||||
try:
|
try:
|
||||||
local_specfile_stage.fetch()
|
local_specfile_stage.fetch()
|
||||||
local_specfile_stage.check()
|
local_specfile_stage.check()
|
||||||
|
try:
|
||||||
|
_get_valid_spec_file(
|
||||||
|
local_specfile_stage.save_filename,
|
||||||
|
CURRENT_BUILD_CACHE_LAYOUT_VERSION,
|
||||||
|
)
|
||||||
|
except InvalidMetadataFile as e:
|
||||||
|
tty.warn(
|
||||||
|
f"Ignoring binary package for {spec.name}/{spec.dag_hash()[:7]} "
|
||||||
|
f"from {mirror} due to invalid metadata file: {e}"
|
||||||
|
)
|
||||||
|
local_specfile_stage.destroy()
|
||||||
|
continue
|
||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
local_specfile_stage.cache_local()
|
local_specfile_stage.cache_local()
|
||||||
|
@ -1674,14 +1727,26 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
ext = "json.sig" if try_signed else "json"
|
ext = "json.sig" if try_signed else "json"
|
||||||
specfile_path = url_util.join(mirror, _build_cache_relative_path, specfile_prefix)
|
specfile_path = url_util.join(mirror, BUILD_CACHE_RELATIVE_PATH, specfile_prefix)
|
||||||
specfile_url = f"{specfile_path}.{ext}"
|
specfile_url = f"{specfile_path}.{ext}"
|
||||||
spackfile_url = url_util.join(mirror, _build_cache_relative_path, tarball)
|
spackfile_url = url_util.join(mirror, BUILD_CACHE_RELATIVE_PATH, tarball)
|
||||||
local_specfile_stage = try_fetch(specfile_url)
|
local_specfile_stage = try_fetch(specfile_url)
|
||||||
if local_specfile_stage:
|
if local_specfile_stage:
|
||||||
local_specfile_path = local_specfile_stage.save_filename
|
local_specfile_path = local_specfile_stage.save_filename
|
||||||
signature_verified = False
|
signature_verified = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
_get_valid_spec_file(
|
||||||
|
local_specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
|
||||||
|
)
|
||||||
|
except InvalidMetadataFile as e:
|
||||||
|
tty.warn(
|
||||||
|
f"Ignoring binary package for {spec.name}/{spec.dag_hash()[:7]} "
|
||||||
|
f"from {mirror} due to invalid metadata file: {e}"
|
||||||
|
)
|
||||||
|
local_specfile_stage.destroy()
|
||||||
|
continue
|
||||||
|
|
||||||
if try_signed and not unsigned:
|
if try_signed and not unsigned:
|
||||||
# If we found a signed specfile at the root, try to verify
|
# If we found a signed specfile at the root, try to verify
|
||||||
# the signature immediately. We will not download the
|
# the signature immediately. We will not download the
|
||||||
|
@ -2001,24 +2066,16 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
|
||||||
)
|
)
|
||||||
|
|
||||||
specfile_path = download_result["specfile_stage"].save_filename
|
specfile_path = download_result["specfile_stage"].save_filename
|
||||||
|
spec_dict, layout_version = _get_valid_spec_file(
|
||||||
with open(specfile_path, "r") as inputfile:
|
specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
|
||||||
content = inputfile.read()
|
)
|
||||||
if specfile_path.endswith(".json.sig"):
|
|
||||||
spec_dict = Spec.extract_json_from_clearsig(content)
|
|
||||||
else:
|
|
||||||
spec_dict = sjson.load(content)
|
|
||||||
|
|
||||||
bchecksum = spec_dict["binary_cache_checksum"]
|
bchecksum = spec_dict["binary_cache_checksum"]
|
||||||
|
|
||||||
filename = download_result["tarball_stage"].save_filename
|
filename = download_result["tarball_stage"].save_filename
|
||||||
signature_verified = download_result["signature_verified"]
|
signature_verified = download_result["signature_verified"]
|
||||||
tmpdir = None
|
tmpdir = None
|
||||||
|
|
||||||
if (
|
if layout_version == 0:
|
||||||
"buildcache_layout_version" not in spec_dict
|
|
||||||
or int(spec_dict["buildcache_layout_version"]) < 1
|
|
||||||
):
|
|
||||||
# Handle the older buildcache layout where the .spack file
|
# Handle the older buildcache layout where the .spack file
|
||||||
# contains a spec json, maybe an .asc file (signature),
|
# contains a spec json, maybe an .asc file (signature),
|
||||||
# and another tarball containing the actual install tree.
|
# and another tarball containing the actual install tree.
|
||||||
|
@ -2029,7 +2086,7 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
|
||||||
_delete_staged_downloads(download_result)
|
_delete_staged_downloads(download_result)
|
||||||
shutil.rmtree(tmpdir)
|
shutil.rmtree(tmpdir)
|
||||||
raise e
|
raise e
|
||||||
else:
|
elif layout_version == 1:
|
||||||
# Newer buildcache layout: the .spack file contains just
|
# Newer buildcache layout: the .spack file contains just
|
||||||
# in the install tree, the signature, if it exists, is
|
# in the install tree, the signature, if it exists, is
|
||||||
# wrapped around the spec.json at the root. If sig verify
|
# wrapped around the spec.json at the root. If sig verify
|
||||||
|
@ -2053,7 +2110,6 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
|
||||||
raise NoChecksumException(
|
raise NoChecksumException(
|
||||||
tarfile_path, size, contents, "sha256", expected, local_checksum
|
tarfile_path, size, contents, "sha256", expected, local_checksum
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with closing(tarfile.open(tarfile_path, "r")) as tar:
|
with closing(tarfile.open(tarfile_path, "r")) as tar:
|
||||||
# Remove install prefix from tarfil to extract directly into spec.prefix
|
# Remove install prefix from tarfil to extract directly into spec.prefix
|
||||||
|
@ -2184,10 +2240,10 @@ def try_direct_fetch(spec, mirrors=None):
|
||||||
|
|
||||||
for mirror in binary_mirrors:
|
for mirror in binary_mirrors:
|
||||||
buildcache_fetch_url_json = url_util.join(
|
buildcache_fetch_url_json = url_util.join(
|
||||||
mirror.fetch_url, _build_cache_relative_path, specfile_name
|
mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, specfile_name
|
||||||
)
|
)
|
||||||
buildcache_fetch_url_signed_json = url_util.join(
|
buildcache_fetch_url_signed_json = url_util.join(
|
||||||
mirror.fetch_url, _build_cache_relative_path, signed_specfile_name
|
mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, signed_specfile_name
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
_, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json)
|
_, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json)
|
||||||
|
@ -2292,7 +2348,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
|
||||||
for mirror in mirror_collection.values():
|
for mirror in mirror_collection.values():
|
||||||
fetch_url = mirror.fetch_url
|
fetch_url = mirror.fetch_url
|
||||||
keys_url = url_util.join(
|
keys_url = url_util.join(
|
||||||
fetch_url, _build_cache_relative_path, _build_cache_keys_relative_path
|
fetch_url, BUILD_CACHE_RELATIVE_PATH, BUILD_CACHE_KEYS_RELATIVE_PATH
|
||||||
)
|
)
|
||||||
keys_index = url_util.join(keys_url, "index.json")
|
keys_index = url_util.join(keys_url, "index.json")
|
||||||
|
|
||||||
|
@ -2357,7 +2413,7 @@ def push_keys(*mirrors, **kwargs):
|
||||||
for mirror in mirrors:
|
for mirror in mirrors:
|
||||||
push_url = getattr(mirror, "push_url", mirror)
|
push_url = getattr(mirror, "push_url", mirror)
|
||||||
keys_url = url_util.join(
|
keys_url = url_util.join(
|
||||||
push_url, _build_cache_relative_path, _build_cache_keys_relative_path
|
push_url, BUILD_CACHE_RELATIVE_PATH, BUILD_CACHE_KEYS_RELATIVE_PATH
|
||||||
)
|
)
|
||||||
keys_local = url_util.local_file_path(keys_url)
|
keys_local = url_util.local_file_path(keys_url)
|
||||||
|
|
||||||
|
@ -2495,11 +2551,11 @@ def download_buildcache_entry(file_descriptions, mirror_url=None):
|
||||||
)
|
)
|
||||||
|
|
||||||
if mirror_url:
|
if mirror_url:
|
||||||
mirror_root = os.path.join(mirror_url, _build_cache_relative_path)
|
mirror_root = os.path.join(mirror_url, BUILD_CACHE_RELATIVE_PATH)
|
||||||
return _download_buildcache_entry(mirror_root, file_descriptions)
|
return _download_buildcache_entry(mirror_root, file_descriptions)
|
||||||
|
|
||||||
for mirror in spack.mirror.MirrorCollection(binary=True).values():
|
for mirror in spack.mirror.MirrorCollection(binary=True).values():
|
||||||
mirror_root = os.path.join(mirror.fetch_url, _build_cache_relative_path)
|
mirror_root = os.path.join(mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH)
|
||||||
|
|
||||||
if _download_buildcache_entry(mirror_root, file_descriptions):
|
if _download_buildcache_entry(mirror_root, file_descriptions):
|
||||||
return True
|
return True
|
||||||
|
@ -2590,7 +2646,7 @@ def __init__(self, url, local_hash, urlopen=web_util.urlopen):
|
||||||
|
|
||||||
def get_remote_hash(self):
|
def get_remote_hash(self):
|
||||||
# Failure to fetch index.json.hash is not fatal
|
# Failure to fetch index.json.hash is not fatal
|
||||||
url_index_hash = url_util.join(self.url, _build_cache_relative_path, "index.json.hash")
|
url_index_hash = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json.hash")
|
||||||
try:
|
try:
|
||||||
response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers))
|
response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers))
|
||||||
except urllib.error.URLError:
|
except urllib.error.URLError:
|
||||||
|
@ -2611,7 +2667,7 @@ def conditional_fetch(self) -> FetchIndexResult:
|
||||||
return FetchIndexResult(etag=None, hash=None, data=None, fresh=True)
|
return FetchIndexResult(etag=None, hash=None, data=None, fresh=True)
|
||||||
|
|
||||||
# Otherwise, download index.json
|
# Otherwise, download index.json
|
||||||
url_index = url_util.join(self.url, _build_cache_relative_path, "index.json")
|
url_index = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self.urlopen(urllib.request.Request(url_index, headers=self.headers))
|
response = self.urlopen(urllib.request.Request(url_index, headers=self.headers))
|
||||||
|
@ -2655,7 +2711,7 @@ def __init__(self, url, etag, urlopen=web_util.urlopen):
|
||||||
|
|
||||||
def conditional_fetch(self) -> FetchIndexResult:
|
def conditional_fetch(self) -> FetchIndexResult:
|
||||||
# Just do a conditional fetch immediately
|
# Just do a conditional fetch immediately
|
||||||
url = url_util.join(self.url, _build_cache_relative_path, "index.json")
|
url = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": web_util.SPACK_USER_AGENT,
|
"User-Agent": web_util.SPACK_USER_AGENT,
|
||||||
"If-None-Match": '"{}"'.format(self.etag),
|
"If-None-Match": '"{}"'.format(self.etag),
|
||||||
|
|
|
@ -4,7 +4,9 @@
|
||||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||||
import filecmp
|
import filecmp
|
||||||
import glob
|
import glob
|
||||||
|
import gzip
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import sys
|
import sys
|
||||||
|
@ -1112,3 +1114,77 @@ def test_tarfile_of_spec_prefix(tmpdir):
|
||||||
assert tar.getmember(f"{expected_prefix}/b_directory/file").isreg()
|
assert tar.getmember(f"{expected_prefix}/b_directory/file").isreg()
|
||||||
assert tar.getmember(f"{expected_prefix}/c_directory").isdir()
|
assert tar.getmember(f"{expected_prefix}/c_directory").isdir()
|
||||||
assert tar.getmember(f"{expected_prefix}/c_directory/file").isreg()
|
assert tar.getmember(f"{expected_prefix}/c_directory/file").isreg()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("layout,expect_success", [(None, True), (1, True), (2, False)])
|
||||||
|
def test_get_valid_spec_file(tmp_path, layout, expect_success):
|
||||||
|
# Test reading a spec.json file that does not specify a layout version.
|
||||||
|
spec_dict = Spec("example").to_dict()
|
||||||
|
path = tmp_path / "spec.json"
|
||||||
|
effective_layout = layout or 0 # If not specified it should be 0
|
||||||
|
|
||||||
|
# Add a layout version
|
||||||
|
if layout is not None:
|
||||||
|
spec_dict["buildcache_layout_version"] = layout
|
||||||
|
|
||||||
|
# Save to file
|
||||||
|
with open(path, "w") as f:
|
||||||
|
json.dump(spec_dict, f)
|
||||||
|
|
||||||
|
try:
|
||||||
|
spec_dict_disk, layout_disk = bindist._get_valid_spec_file(
|
||||||
|
str(path), max_supported_layout=1
|
||||||
|
)
|
||||||
|
assert expect_success
|
||||||
|
assert spec_dict_disk == spec_dict
|
||||||
|
assert layout_disk == effective_layout
|
||||||
|
except bindist.InvalidMetadataFile:
|
||||||
|
assert not expect_success
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_valid_spec_file_doesnt_exist(tmp_path):
|
||||||
|
with pytest.raises(bindist.InvalidMetadataFile, match="No such file"):
|
||||||
|
bindist._get_valid_spec_file(str(tmp_path / "no-such-file"), max_supported_layout=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_valid_spec_file_gzipped(tmp_path):
|
||||||
|
# Create a gzipped file, contents don't matter
|
||||||
|
path = tmp_path / "spec.json.gz"
|
||||||
|
with gzip.open(path, "wb") as f:
|
||||||
|
f.write(b"hello")
|
||||||
|
with pytest.raises(
|
||||||
|
bindist.InvalidMetadataFile, match="Compressed spec files are not supported"
|
||||||
|
):
|
||||||
|
bindist._get_valid_spec_file(str(path), max_supported_layout=1)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("filename", ["spec.json", "spec.json.sig"])
|
||||||
|
def test_get_valid_spec_file_no_json(tmp_path, filename):
|
||||||
|
tmp_path.joinpath(filename).write_text("not json")
|
||||||
|
with pytest.raises(bindist.InvalidMetadataFile):
|
||||||
|
bindist._get_valid_spec_file(str(tmp_path / filename), max_supported_layout=1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_download_tarball_with_unsupported_layout_fails(tmp_path, mutable_config, capsys):
|
||||||
|
layout_version = bindist.CURRENT_BUILD_CACHE_LAYOUT_VERSION + 1
|
||||||
|
spec = Spec("gmake@4.4.1%gcc@13.1.0 arch=linux-ubuntu23.04-zen2")
|
||||||
|
spec._mark_concrete()
|
||||||
|
spec_dict = spec.to_dict()
|
||||||
|
spec_dict["buildcache_layout_version"] = layout_version
|
||||||
|
|
||||||
|
# Setup a basic local build cache structure
|
||||||
|
path = (
|
||||||
|
tmp_path / bindist.build_cache_relative_path() / bindist.tarball_name(spec, ".spec.json")
|
||||||
|
)
|
||||||
|
path.parent.mkdir(parents=True)
|
||||||
|
with open(path, "w") as f:
|
||||||
|
json.dump(spec_dict, f)
|
||||||
|
|
||||||
|
# Configure as a mirror.
|
||||||
|
mirror_cmd("add", "test-mirror", str(tmp_path))
|
||||||
|
|
||||||
|
# Shouldn't be able "download" this.
|
||||||
|
assert bindist.download_tarball(spec, unsigned=True) is None
|
||||||
|
|
||||||
|
# And there should be a warning about an unsupported layout version.
|
||||||
|
assert f"Layout version {layout_version} is too new" in capsys.readouterr().err
|
||||||
|
|
Loading…
Reference in a new issue