buildcache: skip unrecognized metadata files (#40941)

This commit improves forward compatibility of Spack with newer build cache metadata formats.

Before this commit, invalid or unrecognized metadata would be fatal errors, now they just cause
a mirror to be skipped.

Co-authored-by: Harmen Stoppels <me@harmenstoppels.nl>
This commit is contained in:
Scott Wittenburg 2023-11-09 06:30:41 -07:00 committed by Todd Gamblin
parent 1baf712b87
commit 67f20c3e5c
2 changed files with 163 additions and 31 deletions

View file

@ -66,8 +66,9 @@
from spack.stage import Stage from spack.stage import Stage
from spack.util.executable import which from spack.util.executable import which
_build_cache_relative_path = "build_cache" BUILD_CACHE_RELATIVE_PATH = "build_cache"
_build_cache_keys_relative_path = "_pgp" BUILD_CACHE_KEYS_RELATIVE_PATH = "_pgp"
CURRENT_BUILD_CACHE_LAYOUT_VERSION = 1
class BuildCacheDatabase(spack_db.Database): class BuildCacheDatabase(spack_db.Database):
@ -481,7 +482,7 @@ def _fetch_and_cache_index(self, mirror_url, cache_entry={}):
scheme = urllib.parse.urlparse(mirror_url).scheme scheme = urllib.parse.urlparse(mirror_url).scheme
if scheme != "oci" and not web_util.url_exists( if scheme != "oci" and not web_util.url_exists(
url_util.join(mirror_url, _build_cache_relative_path, "index.json") url_util.join(mirror_url, BUILD_CACHE_RELATIVE_PATH, "index.json")
): ):
return False return False
@ -600,6 +601,10 @@ def __init__(self, msg):
super().__init__(msg) super().__init__(msg)
class InvalidMetadataFile(spack.error.SpackError):
pass
class UnsignedPackageException(spack.error.SpackError): class UnsignedPackageException(spack.error.SpackError):
""" """
Raised if installation of unsigned package is attempted without Raised if installation of unsigned package is attempted without
@ -614,11 +619,11 @@ def compute_hash(data):
def build_cache_relative_path(): def build_cache_relative_path():
return _build_cache_relative_path return BUILD_CACHE_RELATIVE_PATH
def build_cache_keys_relative_path(): def build_cache_keys_relative_path():
return _build_cache_keys_relative_path return BUILD_CACHE_KEYS_RELATIVE_PATH
def build_cache_prefix(prefix): def build_cache_prefix(prefix):
@ -1401,7 +1406,7 @@ def _build_tarball_in_stage_dir(spec: Spec, out_url: str, stage_dir: str, option
spec_dict = sjson.load(content) spec_dict = sjson.load(content)
else: else:
raise ValueError("{0} not a valid spec file type".format(spec_file)) raise ValueError("{0} not a valid spec file type".format(spec_file))
spec_dict["buildcache_layout_version"] = 1 spec_dict["buildcache_layout_version"] = CURRENT_BUILD_CACHE_LAYOUT_VERSION
spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum} spec_dict["binary_cache_checksum"] = {"hash_algorithm": "sha256", "hash": checksum}
with open(specfile_path, "w") as outfile: with open(specfile_path, "w") as outfile:
@ -1560,6 +1565,42 @@ def _delete_staged_downloads(download_result):
download_result["specfile_stage"].destroy() download_result["specfile_stage"].destroy()
def _get_valid_spec_file(path: str, max_supported_layout: int) -> Tuple[Dict, int]:
"""Read and validate a spec file, returning the spec dict with its layout version, or raising
InvalidMetadataFile if invalid."""
try:
with open(path, "rb") as f:
binary_content = f.read()
except OSError:
raise InvalidMetadataFile(f"No such file: {path}")
# In the future we may support transparently decompressing compressed spec files.
if binary_content[:2] == b"\x1f\x8b":
raise InvalidMetadataFile("Compressed spec files are not supported")
try:
as_string = binary_content.decode("utf-8")
if path.endswith(".json.sig"):
spec_dict = Spec.extract_json_from_clearsig(as_string)
else:
spec_dict = json.loads(as_string)
except Exception as e:
raise InvalidMetadataFile(f"Could not parse {path} due to: {e}") from e
# Ensure this version is not too new.
try:
layout_version = int(spec_dict.get("buildcache_layout_version", 0))
except ValueError as e:
raise InvalidMetadataFile("Could not parse layout version") from e
if layout_version > max_supported_layout:
raise InvalidMetadataFile(
f"Layout version {layout_version} is too new for this version of Spack"
)
return spec_dict, layout_version
def download_tarball(spec, unsigned=False, mirrors_for_spec=None): def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
""" """
Download binary tarball for given package into stage area, returning Download binary tarball for given package into stage area, returning
@ -1652,6 +1693,18 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
try: try:
local_specfile_stage.fetch() local_specfile_stage.fetch()
local_specfile_stage.check() local_specfile_stage.check()
try:
_get_valid_spec_file(
local_specfile_stage.save_filename,
CURRENT_BUILD_CACHE_LAYOUT_VERSION,
)
except InvalidMetadataFile as e:
tty.warn(
f"Ignoring binary package for {spec.name}/{spec.dag_hash()[:7]} "
f"from {mirror} due to invalid metadata file: {e}"
)
local_specfile_stage.destroy()
continue
except Exception: except Exception:
continue continue
local_specfile_stage.cache_local() local_specfile_stage.cache_local()
@ -1674,14 +1727,26 @@ def download_tarball(spec, unsigned=False, mirrors_for_spec=None):
else: else:
ext = "json.sig" if try_signed else "json" ext = "json.sig" if try_signed else "json"
specfile_path = url_util.join(mirror, _build_cache_relative_path, specfile_prefix) specfile_path = url_util.join(mirror, BUILD_CACHE_RELATIVE_PATH, specfile_prefix)
specfile_url = f"{specfile_path}.{ext}" specfile_url = f"{specfile_path}.{ext}"
spackfile_url = url_util.join(mirror, _build_cache_relative_path, tarball) spackfile_url = url_util.join(mirror, BUILD_CACHE_RELATIVE_PATH, tarball)
local_specfile_stage = try_fetch(specfile_url) local_specfile_stage = try_fetch(specfile_url)
if local_specfile_stage: if local_specfile_stage:
local_specfile_path = local_specfile_stage.save_filename local_specfile_path = local_specfile_stage.save_filename
signature_verified = False signature_verified = False
try:
_get_valid_spec_file(
local_specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
)
except InvalidMetadataFile as e:
tty.warn(
f"Ignoring binary package for {spec.name}/{spec.dag_hash()[:7]} "
f"from {mirror} due to invalid metadata file: {e}"
)
local_specfile_stage.destroy()
continue
if try_signed and not unsigned: if try_signed and not unsigned:
# If we found a signed specfile at the root, try to verify # If we found a signed specfile at the root, try to verify
# the signature immediately. We will not download the # the signature immediately. We will not download the
@ -2001,24 +2066,16 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
) )
specfile_path = download_result["specfile_stage"].save_filename specfile_path = download_result["specfile_stage"].save_filename
spec_dict, layout_version = _get_valid_spec_file(
with open(specfile_path, "r") as inputfile: specfile_path, CURRENT_BUILD_CACHE_LAYOUT_VERSION
content = inputfile.read() )
if specfile_path.endswith(".json.sig"):
spec_dict = Spec.extract_json_from_clearsig(content)
else:
spec_dict = sjson.load(content)
bchecksum = spec_dict["binary_cache_checksum"] bchecksum = spec_dict["binary_cache_checksum"]
filename = download_result["tarball_stage"].save_filename filename = download_result["tarball_stage"].save_filename
signature_verified = download_result["signature_verified"] signature_verified = download_result["signature_verified"]
tmpdir = None tmpdir = None
if ( if layout_version == 0:
"buildcache_layout_version" not in spec_dict
or int(spec_dict["buildcache_layout_version"]) < 1
):
# Handle the older buildcache layout where the .spack file # Handle the older buildcache layout where the .spack file
# contains a spec json, maybe an .asc file (signature), # contains a spec json, maybe an .asc file (signature),
# and another tarball containing the actual install tree. # and another tarball containing the actual install tree.
@ -2029,7 +2086,7 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
_delete_staged_downloads(download_result) _delete_staged_downloads(download_result)
shutil.rmtree(tmpdir) shutil.rmtree(tmpdir)
raise e raise e
else: elif layout_version == 1:
# Newer buildcache layout: the .spack file contains just # Newer buildcache layout: the .spack file contains just
# in the install tree, the signature, if it exists, is # in the install tree, the signature, if it exists, is
# wrapped around the spec.json at the root. If sig verify # wrapped around the spec.json at the root. If sig verify
@ -2053,7 +2110,6 @@ def extract_tarball(spec, download_result, unsigned=False, force=False, timer=ti
raise NoChecksumException( raise NoChecksumException(
tarfile_path, size, contents, "sha256", expected, local_checksum tarfile_path, size, contents, "sha256", expected, local_checksum
) )
try: try:
with closing(tarfile.open(tarfile_path, "r")) as tar: with closing(tarfile.open(tarfile_path, "r")) as tar:
# Remove install prefix from tarfil to extract directly into spec.prefix # Remove install prefix from tarfil to extract directly into spec.prefix
@ -2184,10 +2240,10 @@ def try_direct_fetch(spec, mirrors=None):
for mirror in binary_mirrors: for mirror in binary_mirrors:
buildcache_fetch_url_json = url_util.join( buildcache_fetch_url_json = url_util.join(
mirror.fetch_url, _build_cache_relative_path, specfile_name mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, specfile_name
) )
buildcache_fetch_url_signed_json = url_util.join( buildcache_fetch_url_signed_json = url_util.join(
mirror.fetch_url, _build_cache_relative_path, signed_specfile_name mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH, signed_specfile_name
) )
try: try:
_, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json) _, _, fs = web_util.read_from_url(buildcache_fetch_url_signed_json)
@ -2292,7 +2348,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
for mirror in mirror_collection.values(): for mirror in mirror_collection.values():
fetch_url = mirror.fetch_url fetch_url = mirror.fetch_url
keys_url = url_util.join( keys_url = url_util.join(
fetch_url, _build_cache_relative_path, _build_cache_keys_relative_path fetch_url, BUILD_CACHE_RELATIVE_PATH, BUILD_CACHE_KEYS_RELATIVE_PATH
) )
keys_index = url_util.join(keys_url, "index.json") keys_index = url_util.join(keys_url, "index.json")
@ -2357,7 +2413,7 @@ def push_keys(*mirrors, **kwargs):
for mirror in mirrors: for mirror in mirrors:
push_url = getattr(mirror, "push_url", mirror) push_url = getattr(mirror, "push_url", mirror)
keys_url = url_util.join( keys_url = url_util.join(
push_url, _build_cache_relative_path, _build_cache_keys_relative_path push_url, BUILD_CACHE_RELATIVE_PATH, BUILD_CACHE_KEYS_RELATIVE_PATH
) )
keys_local = url_util.local_file_path(keys_url) keys_local = url_util.local_file_path(keys_url)
@ -2495,11 +2551,11 @@ def download_buildcache_entry(file_descriptions, mirror_url=None):
) )
if mirror_url: if mirror_url:
mirror_root = os.path.join(mirror_url, _build_cache_relative_path) mirror_root = os.path.join(mirror_url, BUILD_CACHE_RELATIVE_PATH)
return _download_buildcache_entry(mirror_root, file_descriptions) return _download_buildcache_entry(mirror_root, file_descriptions)
for mirror in spack.mirror.MirrorCollection(binary=True).values(): for mirror in spack.mirror.MirrorCollection(binary=True).values():
mirror_root = os.path.join(mirror.fetch_url, _build_cache_relative_path) mirror_root = os.path.join(mirror.fetch_url, BUILD_CACHE_RELATIVE_PATH)
if _download_buildcache_entry(mirror_root, file_descriptions): if _download_buildcache_entry(mirror_root, file_descriptions):
return True return True
@ -2590,7 +2646,7 @@ def __init__(self, url, local_hash, urlopen=web_util.urlopen):
def get_remote_hash(self): def get_remote_hash(self):
# Failure to fetch index.json.hash is not fatal # Failure to fetch index.json.hash is not fatal
url_index_hash = url_util.join(self.url, _build_cache_relative_path, "index.json.hash") url_index_hash = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json.hash")
try: try:
response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers)) response = self.urlopen(urllib.request.Request(url_index_hash, headers=self.headers))
except urllib.error.URLError: except urllib.error.URLError:
@ -2611,7 +2667,7 @@ def conditional_fetch(self) -> FetchIndexResult:
return FetchIndexResult(etag=None, hash=None, data=None, fresh=True) return FetchIndexResult(etag=None, hash=None, data=None, fresh=True)
# Otherwise, download index.json # Otherwise, download index.json
url_index = url_util.join(self.url, _build_cache_relative_path, "index.json") url_index = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
try: try:
response = self.urlopen(urllib.request.Request(url_index, headers=self.headers)) response = self.urlopen(urllib.request.Request(url_index, headers=self.headers))
@ -2655,7 +2711,7 @@ def __init__(self, url, etag, urlopen=web_util.urlopen):
def conditional_fetch(self) -> FetchIndexResult: def conditional_fetch(self) -> FetchIndexResult:
# Just do a conditional fetch immediately # Just do a conditional fetch immediately
url = url_util.join(self.url, _build_cache_relative_path, "index.json") url = url_util.join(self.url, BUILD_CACHE_RELATIVE_PATH, "index.json")
headers = { headers = {
"User-Agent": web_util.SPACK_USER_AGENT, "User-Agent": web_util.SPACK_USER_AGENT,
"If-None-Match": '"{}"'.format(self.etag), "If-None-Match": '"{}"'.format(self.etag),

View file

@ -4,7 +4,9 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import filecmp import filecmp
import glob import glob
import gzip
import io import io
import json
import os import os
import platform import platform
import sys import sys
@ -1112,3 +1114,77 @@ def test_tarfile_of_spec_prefix(tmpdir):
assert tar.getmember(f"{expected_prefix}/b_directory/file").isreg() assert tar.getmember(f"{expected_prefix}/b_directory/file").isreg()
assert tar.getmember(f"{expected_prefix}/c_directory").isdir() assert tar.getmember(f"{expected_prefix}/c_directory").isdir()
assert tar.getmember(f"{expected_prefix}/c_directory/file").isreg() assert tar.getmember(f"{expected_prefix}/c_directory/file").isreg()
@pytest.mark.parametrize("layout,expect_success", [(None, True), (1, True), (2, False)])
def test_get_valid_spec_file(tmp_path, layout, expect_success):
# Test reading a spec.json file that does not specify a layout version.
spec_dict = Spec("example").to_dict()
path = tmp_path / "spec.json"
effective_layout = layout or 0 # If not specified it should be 0
# Add a layout version
if layout is not None:
spec_dict["buildcache_layout_version"] = layout
# Save to file
with open(path, "w") as f:
json.dump(spec_dict, f)
try:
spec_dict_disk, layout_disk = bindist._get_valid_spec_file(
str(path), max_supported_layout=1
)
assert expect_success
assert spec_dict_disk == spec_dict
assert layout_disk == effective_layout
except bindist.InvalidMetadataFile:
assert not expect_success
def test_get_valid_spec_file_doesnt_exist(tmp_path):
with pytest.raises(bindist.InvalidMetadataFile, match="No such file"):
bindist._get_valid_spec_file(str(tmp_path / "no-such-file"), max_supported_layout=1)
def test_get_valid_spec_file_gzipped(tmp_path):
# Create a gzipped file, contents don't matter
path = tmp_path / "spec.json.gz"
with gzip.open(path, "wb") as f:
f.write(b"hello")
with pytest.raises(
bindist.InvalidMetadataFile, match="Compressed spec files are not supported"
):
bindist._get_valid_spec_file(str(path), max_supported_layout=1)
@pytest.mark.parametrize("filename", ["spec.json", "spec.json.sig"])
def test_get_valid_spec_file_no_json(tmp_path, filename):
tmp_path.joinpath(filename).write_text("not json")
with pytest.raises(bindist.InvalidMetadataFile):
bindist._get_valid_spec_file(str(tmp_path / filename), max_supported_layout=1)
def test_download_tarball_with_unsupported_layout_fails(tmp_path, mutable_config, capsys):
layout_version = bindist.CURRENT_BUILD_CACHE_LAYOUT_VERSION + 1
spec = Spec("gmake@4.4.1%gcc@13.1.0 arch=linux-ubuntu23.04-zen2")
spec._mark_concrete()
spec_dict = spec.to_dict()
spec_dict["buildcache_layout_version"] = layout_version
# Setup a basic local build cache structure
path = (
tmp_path / bindist.build_cache_relative_path() / bindist.tarball_name(spec, ".spec.json")
)
path.parent.mkdir(parents=True)
with open(path, "w") as f:
json.dump(spec_dict, f)
# Configure as a mirror.
mirror_cmd("add", "test-mirror", str(tmp_path))
# Shouldn't be able "download" this.
assert bindist.download_tarball(spec, unsigned=True) is None
# And there should be a warning about an unsupported layout version.
assert f"Layout version {layout_version} is too new" in capsys.readouterr().err