Binary caching: use full hashes (#19209)

* "spack install" now has a "--require-full-hash-match" option, which
  forces Spack to skip an available binary package when the full hash
  doesn't match. Normally only a DAG-hash match is required, which
  ensures equivalent Specs, but does not account for changing logic
  inside the associated package.
* Add a local binary cache index which tracks specs that have a binary
  install available in a remote binary cache. It is updated with
  "spack buildcache list" or for a given spec when a binary package
  is retrieved for that Spec.
This commit is contained in:
Scott Wittenburg 2020-10-30 13:53:33 -06:00 committed by GitHub
parent 124d654337
commit 31f57e56bb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 809 additions and 172 deletions

View file

@ -18,8 +18,9 @@
import json
from six.moves.urllib.error import URLError
from six.moves.urllib.error import URLError, HTTPError
import llnl.util.lang
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp
@ -27,6 +28,7 @@
import spack.config as config
import spack.database as spack_db
import spack.fetch_strategy as fs
import spack.util.file_cache as file_cache
import spack.relocate as relocate
import spack.util.gpg
import spack.util.spack_json as sjson
@ -37,23 +39,422 @@
from spack.spec import Spec
from spack.stage import Stage
#: default root, relative to the Spack install path
default_binary_index_root = os.path.join(spack.paths.opt_path, 'spack')
_build_cache_relative_path = 'build_cache'
_build_cache_keys_relative_path = '_pgp'
BUILD_CACHE_INDEX_TEMPLATE = '''
<html>
<head>
<title>{title}</title>
</head>
<body>
<ul>
{path_list}
</ul>
</body>
</html>
'''
BUILD_CACHE_INDEX_ENTRY_TEMPLATE = ' <li><a href="{path}">{path}</a></li>'
class BinaryCacheIndex(object):
"""
The BinaryCacheIndex tracks what specs are available on (usually remote)
binary caches.
This index is "best effort", in the sense that whenever we don't find
what we're looking for here, we will attempt to fetch it directly from
configured mirrors anyway. Thus, it has the potential to speed things
up, but cache misses shouldn't break any spack functionality.
At the moment, everything in this class is initialized as lazily as
possible, so that it avoids slowing anything in spack down until
absolutely necessary.
TODO: What's the cost if, e.g., we realize in the middle of a spack
install that the cache is out of date, and we fetch directly? Does it
mean we should have paid the price to update the cache earlier?
"""
def __init__(self, cache_root=None):
self._cache_root = cache_root or default_binary_index_root
self._index_cache_root = os.path.join(self._cache_root, 'indices')
# the key associated with the serialized _local_index_cache
self._index_contents_key = 'contents.json'
# a FileCache instance storing copies of remote binary cache indices
self._index_file_cache = None
# stores a map of mirror URL to index hash and cache key (index path)
self._local_index_cache = None
# hashes of remote indices already ingested into the concrete spec
# cache (_mirrors_for_spec)
self._specs_already_associated = set()
# _mirrors_for_spec is a dictionary mapping DAG hashes to lists of
# entries indicating mirrors where that concrete spec can be found.
# Each entry is a dictionary consisting of:
#
# - the mirror where the spec is, keyed by ``mirror_url``
# - the concrete spec itself, keyed by ``spec`` (including the
# full hash, since the dag hash may match but we want to
# use the updated source if available)
self._mirrors_for_spec = {}
def _init_local_index_cache(self):
if not self._index_file_cache:
self._index_file_cache = file_cache.FileCache(
self._index_cache_root)
cache_key = self._index_contents_key
self._index_file_cache.init_entry(cache_key)
cache_path = self._index_file_cache.cache_path(cache_key)
self._local_index_cache = {}
if os.path.isfile(cache_path):
with self._index_file_cache.read_transaction(
cache_key) as cache_file:
self._local_index_cache = json.load(cache_file)
def clear(self):
""" For testing purposes we need to be able to empty the cache and
clear associated data structures. """
if self._index_file_cache:
self._index_file_cache.destroy()
self._index_file_cache = None
self._local_index_cache = None
self._specs_already_associated = set()
self._mirrors_for_spec = {}
def _write_local_index_cache(self):
self._init_local_index_cache()
cache_key = self._index_contents_key
with self._index_file_cache.write_transaction(cache_key) as (old, new):
json.dump(self._local_index_cache, new)
def regenerate_spec_cache(self, clear_existing=False):
""" Populate the local cache of concrete specs (``_mirrors_for_spec``)
from the locally cached buildcache index files. This is essentially a
no-op if it has already been done, as we keep track of the index
hashes for which we have already associated the built specs. """
self._init_local_index_cache()
if clear_existing:
self._specs_already_associated = set()
self._mirrors_for_spec = {}
for mirror_url in self._local_index_cache:
cache_entry = self._local_index_cache[mirror_url]
cached_index_path = cache_entry['index_path']
cached_index_hash = cache_entry['index_hash']
if cached_index_hash not in self._specs_already_associated:
self._associate_built_specs_with_mirror(cached_index_path,
mirror_url)
self._specs_already_associated.add(cached_index_hash)
def _associate_built_specs_with_mirror(self, cache_key, mirror_url):
tmpdir = tempfile.mkdtemp()
try:
db_root_dir = os.path.join(tmpdir, 'db_root')
db = spack_db.Database(None, db_dir=db_root_dir,
enable_transaction_locking=False)
self._index_file_cache.init_entry(cache_key)
cache_path = self._index_file_cache.cache_path(cache_key)
with self._index_file_cache.read_transaction(cache_key):
db._read_from_file(cache_path)
spec_list = db.query_local(installed=False)
for indexed_spec in spec_list:
dag_hash = indexed_spec.dag_hash()
full_hash = indexed_spec._full_hash
if dag_hash not in self._mirrors_for_spec:
self._mirrors_for_spec[dag_hash] = []
for entry in self._mirrors_for_spec[dag_hash]:
# A binary mirror can only have one spec per DAG hash, so
# if we already have an entry under this DAG hash for this
# mirror url, we may need to replace the spec associated
# with it (but only if it has a different full_hash).
if entry['mirror_url'] == mirror_url:
if full_hash and full_hash != entry['spec']._full_hash:
entry['spec'] = indexed_spec
break
else:
self._mirrors_for_spec[dag_hash].append({
"mirror_url": mirror_url,
"spec": indexed_spec,
})
finally:
shutil.rmtree(tmpdir)
def get_all_built_specs(self):
spec_list = []
for dag_hash in self._mirrors_for_spec:
# in the absence of further information, all concrete specs
# with the same DAG hash are equivalent, so we can just
# return the first one in the list.
if len(self._mirrors_for_spec[dag_hash]) > 0:
spec_list.append(self._mirrors_for_spec[dag_hash][0]['spec'])
return spec_list
def find_built_spec(self, spec):
"""Look in our cache for the built spec corresponding to ``spec``.
If the spec can be found among the configured binary mirrors, a
list is returned that contains the concrete spec and the mirror url
of each mirror where it can be found. Otherwise, ``None`` is
returned.
This method does not trigger reading anything from remote mirrors, but
rather just checks if the concrete spec is found within the cache.
The cache can be updated by calling ``update()`` on the cache.
Args:
spec (Spec): Concrete spec to find
Returns:
An list of objects containing the found specs and mirror url where
each can be found, e.g.:
.. code-block:: python
[
{
"spec": <concrete-spec>,
"mirror_url": <mirror-root-url>
}
]
"""
self.regenerate_spec_cache()
find_hash = spec.dag_hash()
if find_hash not in self._mirrors_for_spec:
return None
return self._mirrors_for_spec[find_hash]
def update_spec(self, spec, found_list):
"""
Take list of {'mirror_url': m, 'spec': s} objects and update the local
built_spec_cache
"""
spec_dag_hash = spec.dag_hash()
if spec_dag_hash not in self._mirrors_for_spec:
self._mirrors_for_spec[spec_dag_hash] = found_list
else:
current_list = self._mirrors_for_spec[spec_dag_hash]
for new_entry in found_list:
for cur_entry in current_list:
if new_entry['mirror_url'] == cur_entry['mirror_url']:
cur_entry['spec'] = new_entry['spec']
break
else:
current_list.append = {
'mirror_url': new_entry['mirror_url'],
'spec': new_entry['spec'],
}
def update(self):
""" Make sure local cache of buildcache index files is up to date.
If the same mirrors are configured as the last time this was called
and none of the remote buildcache indices have changed, calling this
method will only result in fetching the index hash from each mirror
to confirm it is the same as what is stored locally. Otherwise, the
buildcache ``index.json`` and ``index.json.hash`` files are retrieved
from each configured mirror and stored locally (both in memory and
on disk under ``_index_cache_root``). """
self._init_local_index_cache()
mirrors = spack.mirror.MirrorCollection()
configured_mirror_urls = [m.fetch_url for m in mirrors.values()]
items_to_remove = []
spec_cache_clear_needed = False
spec_cache_regenerate_needed = not self._mirrors_for_spec
# First compare the mirror urls currently present in the cache to the
# configured mirrors. If we have a cached index for a mirror which is
# no longer configured, we should remove it from the cache. For any
# cached indices corresponding to currently configured mirrors, we need
# to check if the cache is still good, or needs to be updated.
# Finally, if there are configured mirrors for which we don't have a
# cache entry, we need to fetch and cache the indices from those
# mirrors.
# If, during this process, we find that any mirrors for which we
# already have entries have either been removed, or their index
# hash has changed, then our concrete spec cache (_mirrors_for_spec)
# likely has entries that need to be removed, so we will clear it
# and regenerate that data structure.
# If, during this process, we find that there are new mirrors for
# which do not yet have an entry in our index cache, then we simply
# need to regenerate the concrete spec cache, but do not need to
# clear it first.
# Otherwise the concrete spec cache should not need to be updated at
# all.
for cached_mirror_url in self._local_index_cache:
cache_entry = self._local_index_cache[cached_mirror_url]
cached_index_hash = cache_entry['index_hash']
cached_index_path = cache_entry['index_path']
if cached_mirror_url in configured_mirror_urls:
# May need to fetch the index and update the local caches
needs_regen = self.fetch_and_cache_index(
cached_mirror_url, expect_hash=cached_index_hash)
# In this block, the need to regenerate implies a need to
# clear as well. This is the first place we set these to
# non-default values, so setting them False is fine. After
# this, we should never set False again, only True.
spec_cache_clear_needed = needs_regen
spec_cache_regenerate_needed = needs_regen
else:
# No longer have this mirror, cached index should be removed
items_to_remove.append({
'url': cached_mirror_url,
'cache_key': os.path.join(self._index_cache_root,
cached_index_path)
})
spec_cache_clear_needed = True
spec_cache_regenerate_needed = True
# Clean up items to be removed, identified above
for item in items_to_remove:
url = item['url']
cache_key = item['cache_key']
self._index_file_cache.remove(cache_key)
del self._local_index_cache[url]
# Iterate the configured mirrors now. Any mirror urls we do not
# already have in our cache must be fetched, stored, and represented
# locally.
for mirror_url in configured_mirror_urls:
if mirror_url not in self._local_index_cache:
# Need to fetch the index and update the local caches
needs_regen = self.fetch_and_cache_index(mirror_url)
# Generally speaking, a new mirror wouldn't imply the need to
# clear the spec cache, but don't touch it, which lets the
# previous decisions stand. Also, only change the need to
# regenerate possibly from False to True.
if needs_regen:
spec_cache_regenerate_needed = True
self._write_local_index_cache()
if spec_cache_regenerate_needed:
self.regenerate_spec_cache(clear_existing=spec_cache_clear_needed)
def fetch_and_cache_index(self, mirror_url, expect_hash=None):
""" Fetch a buildcache index file from a remote mirror and cache it.
If we already have a cached index from this mirror, then we first
check if the hash has changed, and we avoid fetching it if not.
Args:
mirror_url (str): Base url of mirror
expect_hash (str): If provided, this hash will be compared against
the index hash we retrieve from the mirror, to determine if we
need to fetch the index or not.
Returns:
True if this function thinks the concrete spec cache,
``_mirrors_for_spec``, should be regenerated. Returns False
otherwise.
"""
index_fetch_url = url_util.join(
mirror_url, _build_cache_relative_path, 'index.json')
hash_fetch_url = url_util.join(
mirror_url, _build_cache_relative_path, 'index.json.hash')
old_cache_key = None
fetched_hash = None
# Fetch the hash first so we can check if we actually need to fetch
# the index itself.
try:
_, _, fs = web_util.read_from_url(hash_fetch_url)
fetched_hash = codecs.getreader('utf-8')(fs).read()
except (URLError, web_util.SpackWebError) as url_err:
tty.debug('Unable to read index hash {0}'.format(
hash_fetch_url), url_err, 1)
# The only case where we'll skip attempting to fetch the buildcache
# index from the mirror is when we already have a hash for this
# mirror, we were able to retrieve one from the mirror, and
# the two hashes are the same.
if expect_hash and fetched_hash:
if fetched_hash == expect_hash:
tty.debug('Cached index for {0} already up to date'.format(
mirror_url))
return False
else:
# We expected a hash, we fetched a hash, and they were not the
# same. If we end up fetching an index successfully and
# replacing our entry for this mirror, we should clean up the
# existing cache file
if mirror_url in self._local_index_cache:
existing_entry = self._local_index_cache[mirror_url]
old_cache_key = existing_entry['index_path']
tty.debug('Fetching index from {0}'.format(index_fetch_url))
# Fetch index itself
try:
_, _, fs = web_util.read_from_url(index_fetch_url)
index_object_str = codecs.getreader('utf-8')(fs).read()
except (URLError, web_util.SpackWebError) as url_err:
tty.debug('Unable to read index {0}'.format(index_fetch_url),
url_err, 1)
# We failed to fetch the index, even though we decided it was
# necessary. However, regenerating the spec cache won't produce
# anything different than what it has already, so return False.
return False
locally_computed_hash = compute_hash(index_object_str)
if fetched_hash is not None and locally_computed_hash != fetched_hash:
msg_tmpl = ('Computed hash ({0}) did not match remote ({1}), '
'indicating error in index transmission')
tty.error(msg_tmpl.format(locally_computed_hash, expect_hash))
# We somehow got an index that doesn't match the remote one, maybe
# the next time we try we'll be successful. Regardless, we're not
# updating our index cache with this, so don't regenerate the spec
# cache either.
return False
url_hash = compute_hash(mirror_url)
cache_key = '{0}_{1}.json'.format(
url_hash[:10], locally_computed_hash[:10])
self._index_file_cache.init_entry(cache_key)
with self._index_file_cache.write_transaction(cache_key) as (old, new):
new.write(index_object_str)
self._local_index_cache[mirror_url] = {
'index_hash': locally_computed_hash,
'index_path': cache_key,
}
# clean up the old cache_key if necessary
if old_cache_key:
self._index_file_cache.remove(old_cache_key)
# We fetched an index and updated the local index cache, we should
# regenerate the spec cache as a result.
return True
def _binary_index():
"""Get the singleton store instance."""
cache_root = spack.config.get(
'config:binary_index_root', default_binary_index_root)
cache_root = spack.util.path.canonicalize_path(cache_root)
return BinaryCacheIndex(cache_root)
#: Singleton binary_index instance
binary_index = llnl.util.lang.Singleton(_binary_index)
class NoOverwriteException(spack.error.SpackError):
@ -119,6 +520,10 @@ def __init__(self, msg):
super(NewLayoutException, self).__init__(msg)
def compute_hash(data):
return hashlib.sha256(data.encode('utf-8')).hexdigest()
def build_cache_relative_path():
return _build_cache_relative_path
@ -324,11 +729,29 @@ def generate_package_index(cache_prefix):
with open(index_json_path, 'w') as f:
db._write_to_file(f)
# Read the index back in and compute it's hash
with open(index_json_path) as f:
index_string = f.read()
index_hash = compute_hash(index_string)
# Write the hash out to a local file
index_hash_path = os.path.join(db_root_dir, 'index.json.hash')
with open(index_hash_path, 'w') as f:
f.write(index_hash)
# Push the index itself
web_util.push_to_url(
index_json_path,
url_util.join(cache_prefix, 'index.json'),
keep_original=False,
extra_args={'ContentType': 'application/json'})
# Push the hash
web_util.push_to_url(
index_hash_path,
url_util.join(cache_prefix, 'index.json.hash'),
keep_original=False,
extra_args={'ContentType': 'text/plain'})
finally:
shutil.rmtree(tmpdir)
@ -488,13 +911,6 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
spec.prefix, spack.store.layout.root)
buildinfo['relative_rpaths'] = rel
spec_dict['buildinfo'] = buildinfo
spec_dict['full_hash'] = spec.full_hash()
tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format(
spec_dict['full_hash'],
spec.name,
url_util.format(remote_specfile_path)))
tty.debug(spec.tree())
with open(specfile_path, 'w') as outfile:
outfile.write(syaml.dump(spec_dict))
@ -545,10 +961,20 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
return None
def download_tarball(spec):
def download_tarball(spec, preferred_mirrors=None):
"""
Download binary tarball for given package into stage area
Return True if successful
Download binary tarball for given package into stage area, returning
path to downloaded tarball if successful, None otherwise.
Args:
spec (Spec): Concrete spec
preferred_mirrors (list): If provided, this is a list of preferred
mirror urls. Other configured mirrors will only be used if the
tarball can't be retrieved from one of these.
Returns:
Path to the downloaded tarball, or ``None`` if the tarball could not
be downloaded from any configured mirrors.
"""
if not spack.mirror.MirrorCollection():
tty.die("Please add a spack mirror to allow " +
@ -556,12 +982,21 @@ def download_tarball(spec):
tarball = tarball_path_name(spec, '.spack')
for mirror in spack.mirror.MirrorCollection().values():
url = url_util.join(
mirror.fetch_url, _build_cache_relative_path, tarball)
urls_to_try = []
if preferred_mirrors:
for preferred_url in preferred_mirrors:
urls_to_try.append(url_util.join(
preferred_url, _build_cache_relative_path, tarball))
for mirror in spack.mirror.MirrorCollection().values():
if not preferred_mirrors or mirror.fetch_url not in preferred_mirrors:
urls_to_try.append(url_util.join(
mirror.fetch_url, _build_cache_relative_path, tarball))
for try_url in urls_to_try:
# stage the tarball into standard place
stage = Stage(url, name="build_cache", keep=True)
stage = Stage(try_url, name="build_cache", keep=True)
stage.create()
try:
stage.fetch()
@ -569,6 +1004,8 @@ def download_tarball(spec):
except fs.FetchError:
continue
tty.warn("download_tarball() was unable to download " +
"{0} from any configured mirrors".format(spec))
return None
@ -852,117 +1289,97 @@ def extract_tarball(spec, filename, allow_root=False, unsigned=False,
os.remove(filename)
# Internal cache for downloaded specs
_cached_specs = set()
def try_download_specs(urls=None, force=False):
'''
Try to download the urls and cache them
'''
global _cached_specs
if urls is None:
return {}
for link in urls:
with Stage(link, name="build_cache", keep=True) as stage:
if force and os.path.exists(stage.save_filename):
os.remove(stage.save_filename)
if not os.path.exists(stage.save_filename):
try:
stage.fetch()
except fs.FetchError:
continue
with open(stage.save_filename, 'r') as f:
# read the spec from the build cache file. All specs
# in build caches are concrete (as they are built) so
# we need to mark this spec concrete on read-in.
spec = Spec.from_yaml(f)
spec._mark_concrete()
_cached_specs.add(spec)
return _cached_specs
def get_spec(spec=None, force=False):
def try_direct_fetch(spec, force=False, full_hash_match=False):
"""
Check if spec.yaml exists on mirrors and return it if it does
Try to find the spec directly on the configured mirrors
"""
global _cached_specs
urls = set()
if spec is None:
return {}
specfile_name = tarball_name(spec, '.spec.yaml')
if not spack.mirror.MirrorCollection():
tty.debug("No Spack mirrors are currently configured")
return {}
if _cached_specs and spec in _cached_specs:
return _cached_specs
lenient = not full_hash_match
found_specs = []
for mirror in spack.mirror.MirrorCollection().values():
fetch_url_build_cache = url_util.join(
mirror.fetch_url, _build_cache_relative_path)
mirror_dir = url_util.local_file_path(fetch_url_build_cache)
if mirror_dir:
tty.debug('Finding buildcaches in {0}'.format(mirror_dir))
link = url_util.join(fetch_url_build_cache, specfile_name)
urls.add(link)
else:
tty.debug('Finding buildcaches at {0}'
.format(url_util.format(fetch_url_build_cache)))
link = url_util.join(fetch_url_build_cache, specfile_name)
urls.add(link)
return try_download_specs(urls=urls, force=force)
def get_specs():
"""
Get spec.yaml's for build caches available on mirror
"""
global _cached_specs
if not spack.mirror.MirrorCollection():
tty.debug("No Spack mirrors are currently configured")
return {}
for mirror in spack.mirror.MirrorCollection().values():
fetch_url_build_cache = url_util.join(
mirror.fetch_url, _build_cache_relative_path)
tty.debug('Finding buildcaches at {0}'
.format(url_util.format(fetch_url_build_cache)))
index_url = url_util.join(fetch_url_build_cache, 'index.json')
buildcache_fetch_url = url_util.join(
mirror.fetch_url, _build_cache_relative_path, specfile_name)
try:
_, _, file_stream = web_util.read_from_url(
index_url, 'application/json')
index_object = codecs.getreader('utf-8')(file_stream).read()
except (URLError, web_util.SpackWebError) as url_err:
tty.debug('Failed to read index {0}'.format(index_url), url_err, 1)
# Continue on to the next mirror
_, _, fs = web_util.read_from_url(buildcache_fetch_url)
fetched_spec_yaml = codecs.getreader('utf-8')(fs).read()
except (URLError, web_util.SpackWebError, HTTPError) as url_err:
tty.debug('Did not find {0} on {1}'.format(
specfile_name, buildcache_fetch_url), url_err)
continue
tmpdir = tempfile.mkdtemp()
index_file_path = os.path.join(tmpdir, 'index.json')
with open(index_file_path, 'w') as fd:
fd.write(index_object)
# read the spec from the build cache file. All specs in build caches
# are concrete (as they are built) so we need to mark this spec
# concrete on read-in.
fetched_spec = Spec.from_yaml(fetched_spec_yaml)
fetched_spec._mark_concrete()
db_root_dir = os.path.join(tmpdir, 'db_root')
db = spack_db.Database(None, db_dir=db_root_dir,
enable_transaction_locking=False)
# Do not recompute the full hash for the fetched spec, instead just
# read the property.
if lenient or fetched_spec._full_hash == spec.full_hash():
found_specs.append({
'mirror_url': mirror.fetch_url,
'spec': fetched_spec,
})
db._read_from_file(index_file_path)
spec_list = db.query_local(installed=False)
return found_specs
for indexed_spec in spec_list:
_cached_specs.add(indexed_spec)
return _cached_specs
def get_mirrors_for_spec(spec=None, force=False, full_hash_match=False):
"""
Check if concrete spec exists on mirrors and return a list
indicating the mirrors on which it can be found
"""
if spec is None:
return []
if not spack.mirror.MirrorCollection():
tty.debug("No Spack mirrors are currently configured")
return {}
results = []
lenient = not full_hash_match
spec_full_hash = spec.full_hash()
def filter_candidates(candidate_list):
filtered_candidates = []
for candidate in candidate_list:
candidate_full_hash = candidate['spec']._full_hash
if lenient or spec_full_hash == candidate_full_hash:
filtered_candidates.append(candidate)
return filtered_candidates
candidates = binary_index.find_built_spec(spec)
if candidates:
results = filter_candidates(candidates)
# Maybe we just didn't have the latest information from the mirror, so
# try to fetch directly.
if not results:
results = try_direct_fetch(spec,
force=force,
full_hash_match=full_hash_match)
if results:
binary_index.update_spec(spec, results)
return results
def update_cache_and_get_specs():
"""
Get all concrete specs for build caches available on configured mirrors.
Initialization of internal cache data structures is done as lazily as
possible, so this method will also attempt to initialize and update the
local index cache (essentially a no-op if it has been done already and
nothing has changed on the configured mirrors.)
"""
binary_index.update()
return binary_index.get_all_built_specs()
def clear_spec_cache():
binary_index.clear()
def get_keys(install=False, trust=False, force=False, mirrors=None):
@ -1131,23 +1548,38 @@ def needs_rebuild(spec, mirror_url, rebuild_on_errors=False):
spec_yaml = syaml.load(yaml_contents)
yaml_spec = spec_yaml['spec']
name = spec.name
# The "spec" key in the yaml is a list of objects, each with a single
# key that is the package name. While the list usually just contains
# a single object, we iterate over the list looking for the object
# with the name of this concrete spec as a key, out of an abundance
# of caution.
cached_pkg_specs = [item[name] for item in yaml_spec if name in item]
cached_target = cached_pkg_specs[0] if cached_pkg_specs else None
# If either the full_hash didn't exist in the .spec.yaml file, or it
# did, but didn't match the one we computed locally, then we should
# just rebuild. This can be simplified once the dag_hash and the
# full_hash become the same thing.
if ('full_hash' not in spec_yaml or
spec_yaml['full_hash'] != pkg_full_hash):
if 'full_hash' in spec_yaml:
rebuild = False
if not cached_target or 'full_hash' not in cached_target:
reason = 'full_hash was missing from remote spec.yaml'
rebuild = True
else:
full_hash = cached_target['full_hash']
if full_hash != pkg_full_hash:
reason = 'hash mismatch, remote = {0}, local = {1}'.format(
spec_yaml['full_hash'], pkg_full_hash)
else:
reason = 'full_hash was missing from remote spec.yaml'
full_hash, pkg_full_hash)
rebuild = True
if rebuild:
tty.msg('Rebuilding {0}, reason: {1}'.format(
spec.short_spec, reason))
tty.msg(spec.tree())
return True
return False
return rebuild
def check_specs_against_mirrors(mirrors, specs, output_file=None,

View file

@ -295,7 +295,7 @@ def match_downloaded_specs(pkgs, allow_multiple_matches=False, force=False,
specs_from_cli = []
has_errors = False
specs = bindist.get_specs()
specs = bindist.update_cache_and_get_specs()
if not other_arch:
arch = spack.architecture.default_arch().to_spec()
specs = [s for s in specs if s.satisfies(arch)]
@ -509,7 +509,7 @@ def install_tarball(spec, args):
def listspecs(args):
"""list binary packages available from mirrors"""
specs = bindist.get_specs()
specs = bindist.update_cache_and_get_specs()
if not args.allarch:
arch = spack.architecture.default_arch().to_spec()
specs = [s for s in specs if s.satisfies(arch)]

View file

@ -45,6 +45,7 @@ def update_kwargs_from_args(args, kwargs):
'explicit': True, # Always true for install command
'stop_at': args.until,
'unsigned': args.unsigned,
'full_hash_match': args.full_hash_match,
})
kwargs.update({
@ -107,6 +108,11 @@ def setup_parser(subparser):
'--no-check-signature', action='store_true',
dest='unsigned', default=False,
help="do not check signatures of binary packages")
subparser.add_argument(
'--require-full-hash-match', action='store_true',
dest='full_hash_match', default=False, help="""when installing from
binary mirrors, do not install binary package unless the full hash of the
remote spec matches that of the local spec""")
subparser.add_argument(
'--show-log-on-error', action='store_true',
help="print full build log to stderr if build fails")

View file

@ -209,7 +209,7 @@ def to_dict(self, include_fields=default_install_record_fields):
for field_name in include_fields:
if field_name == 'spec':
rec_dict.update({'spec': self.spec.to_node_dict()})
rec_dict.update({'spec': self.spec.node_dict_with_hashes()})
elif field_name == 'deprecated_for' and self.deprecated_for:
rec_dict.update({'deprecated_for': self.deprecated_for})
else:
@ -1112,6 +1112,7 @@ def _add(
# the original hash of concrete specs.
new_spec._mark_concrete()
new_spec._hash = key
new_spec._full_hash = spec._full_hash
else:
# If it is already there, mark it as installed and update

View file

@ -213,7 +213,8 @@ def _hms(seconds):
return ' '.join(parts)
def _install_from_cache(pkg, cache_only, explicit, unsigned=False):
def _install_from_cache(pkg, cache_only, explicit, unsigned=False,
full_hash_match=False):
"""
Extract the package from binary cache
@ -229,8 +230,8 @@ def _install_from_cache(pkg, cache_only, explicit, unsigned=False):
(bool) ``True`` if the package was extract from binary cache,
``False`` otherwise
"""
installed_from_cache = _try_install_from_binary_cache(pkg, explicit,
unsigned)
installed_from_cache = _try_install_from_binary_cache(
pkg, explicit, unsigned=unsigned, full_hash_match=full_hash_match)
pkg_id = package_id(pkg)
if not installed_from_cache:
pre = 'No binary for {0} found'.format(pkg_id)
@ -302,7 +303,8 @@ def _process_external_package(pkg, explicit):
spack.store.db.add(spec, None, explicit=explicit)
def _process_binary_cache_tarball(pkg, binary_spec, explicit, unsigned):
def _process_binary_cache_tarball(pkg, binary_spec, explicit, unsigned,
preferred_mirrors=None):
"""
Process the binary cache tarball.
@ -312,12 +314,15 @@ def _process_binary_cache_tarball(pkg, binary_spec, explicit, unsigned):
explicit (bool): the package was explicitly requested by the user
unsigned (bool): ``True`` if binary package signatures to be checked,
otherwise, ``False``
preferred_mirrors (list): Optional list of urls to prefer when
attempting to download the tarball
Return:
(bool) ``True`` if the package was extracted from binary cache,
else ``False``
"""
tarball = binary_distribution.download_tarball(binary_spec)
tarball = binary_distribution.download_tarball(
binary_spec, preferred_mirrors=preferred_mirrors)
# see #10063 : install from source if tarball doesn't exist
if tarball is None:
tty.msg('{0} exists in binary cache but with different hash'
@ -333,7 +338,8 @@ def _process_binary_cache_tarball(pkg, binary_spec, explicit, unsigned):
return True
def _try_install_from_binary_cache(pkg, explicit, unsigned=False):
def _try_install_from_binary_cache(pkg, explicit, unsigned=False,
full_hash_match=False):
"""
Try to extract the package from binary cache.
@ -345,13 +351,18 @@ def _try_install_from_binary_cache(pkg, explicit, unsigned=False):
"""
pkg_id = package_id(pkg)
tty.debug('Searching for binary cache of {0}'.format(pkg_id))
specs = binary_distribution.get_spec(pkg.spec, force=False)
binary_spec = spack.spec.Spec.from_dict(pkg.spec.to_dict())
binary_spec._mark_concrete()
if binary_spec not in specs:
matches = binary_distribution.get_mirrors_for_spec(
pkg.spec, force=False, full_hash_match=full_hash_match)
if not matches:
return False
return _process_binary_cache_tarball(pkg, binary_spec, explicit, unsigned)
# In the absence of guidance from user or some other reason to prefer one
# mirror over another, any match will suffice, so just pick the first one.
preferred_mirrors = [match['mirror_url'] for match in matches]
binary_spec = matches[0]['spec']
return _process_binary_cache_tarball(pkg, binary_spec, explicit, unsigned,
preferred_mirrors=preferred_mirrors)
def _update_explicit_entry_in_db(pkg, rec, explicit):
@ -1054,6 +1065,7 @@ def _install_task(self, task, **kwargs):
unsigned = kwargs.get('unsigned', False)
use_cache = kwargs.get('use_cache', True)
verbose = kwargs.get('verbose', False)
full_hash_match = kwargs.get('full_hash_match', False)
pkg = task.pkg
pkg_id = package_id(pkg)
@ -1065,7 +1077,8 @@ def _install_task(self, task, **kwargs):
# Use the binary cache if requested
if use_cache and \
_install_from_cache(pkg, cache_only, explicit, unsigned):
_install_from_cache(pkg, cache_only, explicit, unsigned,
full_hash_match):
self._update_installed(task)
if task.compiler:
spack.compilers.add_compilers_to_config(

View file

@ -26,7 +26,6 @@
'relative_rpaths': {'type': 'boolean'},
},
},
'full_hash': {'type': 'string'},
'spec': {
'type': 'array',
'items': spack.schema.spec.properties,

View file

@ -87,6 +87,7 @@
],
},
'allow_sgid': {'type': 'boolean'},
'binary_index_root': {'type': 'string'},
},
},
}

View file

@ -81,6 +81,7 @@
],
'properties': {
'hash': {'type': 'string'},
'full_hash': {'type': 'string'},
'version': {
'oneOf': [
{'type': 'string'},

View file

@ -1640,14 +1640,22 @@ def to_dict(self, hash=ht.dag_hash):
"""
node_list = []
for s in self.traverse(order='pre', deptype=hash.deptype):
node = s.to_node_dict(hash)
node[s.name]['hash'] = s.dag_hash()
if 'build' in hash.deptype:
node[s.name]['build_hash'] = s.build_hash()
node_list.append(node)
node_list.append(s.node_dict_with_hashes(hash))
return syaml.syaml_dict([('spec', node_list)])
def node_dict_with_hashes(self, hash=ht.dag_hash):
""" Returns a node_dict of this spec with the dag hash added. If this
spec is concrete, the full hash is added as well. If 'build' is in
the hash_type, the build hash is also added. """
node = self.to_node_dict(hash)
node[self.name]['hash'] = self.dag_hash()
if self.concrete:
node[self.name]['full_hash'] = self.full_hash()
if 'build' in hash.deptype:
node[self.name]['build_hash'] = self.build_hash()
return node
def to_record_dict(self):
"""Return a "flat" dictionary with name and hash as top-level keys.

View file

@ -19,6 +19,7 @@
import spack.cmd.install as install
import spack.cmd.uninstall as uninstall
import spack.cmd.mirror as mirror
from spack.main import SpackCommand
import spack.mirror
import spack.util.gpg
from spack.directory_layout import YamlDirectoryLayout
@ -31,6 +32,11 @@
mirror_path_def = None
mirror_path_rel = None
mirror_cmd = SpackCommand('mirror')
install_cmd = SpackCommand('install')
uninstall_cmd = SpackCommand('uninstall')
buildcache_cmd = SpackCommand('buildcache')
@pytest.fixture(scope='function')
def cache_directory(tmpdir):
@ -254,7 +260,7 @@ def test_default_rpaths_create_install_default_layout(tmpdir,
args = parser.parse_args(['list', '-l', '-v'])
buildcache.buildcache(parser, args)
bindist._cached_specs = set()
bindist.clear_spec_cache()
spack.stage.purge()
margs = mparser.parse_args(
['rm', '--scope', 'site', 'test-mirror-def'])
@ -302,7 +308,7 @@ def test_default_rpaths_install_nondefault_layout(tmpdir,
args = parser.parse_args(install_args)
buildcache.buildcache(parser, args)
bindist._cached_specs = set()
bindist.clear_spec_cache()
spack.stage.purge()
margs = mparser.parse_args(
['rm', '--scope', 'site', 'test-mirror-def'])
@ -358,7 +364,7 @@ def test_relative_rpaths_create_default_layout(tmpdir,
uargs = uparser.parse_args(['-y', '--dependents', gspec.name])
uninstall.uninstall(uparser, uargs)
bindist._cached_specs = set()
bindist.clear_spec_cache()
spack.stage.purge()
@ -395,7 +401,7 @@ def test_relative_rpaths_install_default_layout(tmpdir,
buildcache.setup_parser(parser)
# set default buildcache args
install_args = ['install', '-a', '-u',
install_args = ['install', '-a', '-u', '-f',
cspec.name]
# install buildcache created with relativized rpaths
@ -419,7 +425,7 @@ def test_relative_rpaths_install_default_layout(tmpdir,
args = parser.parse_args(install_args)
buildcache.buildcache(parser, args)
bindist._cached_specs = set()
bindist.clear_spec_cache()
spack.stage.purge()
margs = mparser.parse_args(
['rm', '--scope', 'site', 'test-mirror-rel'])
@ -460,13 +466,13 @@ def test_relative_rpaths_install_nondefault(tmpdir,
buildcache.setup_parser(parser)
# Set default buildcache args
install_args = ['install', '-a', '-u', '%s' % cspec.name]
install_args = ['install', '-a', '-u', '-f', '%s' % cspec.name]
# test install in non-default install path scheme and relative path
args = parser.parse_args(install_args)
buildcache.buildcache(parser, args)
bindist._cached_specs = set()
bindist.clear_spec_cache()
spack.stage.purge()
margs = mparser.parse_args(
['rm', '--scope', 'site', 'test-mirror-rel'])
@ -510,3 +516,101 @@ def test_push_and_fetch_keys(mock_gnupghome):
new_keys = spack.util.gpg.public_keys()
assert len(new_keys) == 1
assert new_keys[0] == fpr
@pytest.mark.requires_executables(*args)
@pytest.mark.disable_clean_stage_check
@pytest.mark.maybeslow
@pytest.mark.nomockstage
@pytest.mark.usefixtures('default_config', 'cache_directory',
'install_dir_non_default_layout')
def test_built_spec_cache(tmpdir,
install_mockery):
""" Because the buildcache list command fetches the buildcache index
and uses it to populate the binary_distribution built spec cache, when
this test calls get_mirrors_for_spec, it is testing the popluation of
that cache from a buildcache index. """
global mirror_path_rel
mparser = argparse.ArgumentParser()
mirror.setup_parser(mparser)
margs = mparser.parse_args(
['add', '--scope', 'site', 'test-mirror-rel', 'file://%s' % mirror_path_rel])
mirror.mirror(mparser, margs)
# setup argument parser
parser = argparse.ArgumentParser()
buildcache.setup_parser(parser)
list_args = ['list', '-a', '-l']
args = parser.parse_args(list_args)
buildcache.buildcache(parser, args)
gspec = Spec('garply')
gspec.concretize()
cspec = Spec('corge')
cspec.concretize()
full_hash_map = {
'garply': gspec.full_hash(),
'corge': cspec.full_hash(),
}
gspec_results = bindist.get_mirrors_for_spec(gspec)
gspec_mirrors = {}
for result in gspec_results:
s = result['spec']
assert(s._full_hash == full_hash_map[s.name])
assert(result['mirror_url'] not in gspec_mirrors)
gspec_mirrors[result['mirror_url']] = True
cspec_results = bindist.get_mirrors_for_spec(cspec, full_hash_match=True)
cspec_mirrors = {}
for result in cspec_results:
s = result['spec']
assert(s._full_hash == full_hash_map[s.name])
assert(result['mirror_url'] not in cspec_mirrors)
cspec_mirrors[result['mirror_url']] = True
bindist.clear_spec_cache()
margs = mparser.parse_args(
['rm', '--scope', 'site', 'test-mirror-rel'])
mirror.mirror(mparser, margs)
def test_spec_needs_rebuild(install_mockery_mutable_config, mock_packages,
mock_fetch, monkeypatch, tmpdir):
"""Make sure needs_rebuild properly compares remote full_hash
against locally computed one, avoiding unnecessary rebuilds"""
# Create a temp mirror directory for buildcache usage
mirror_dir = tmpdir.join('mirror_dir')
mirror_url = 'file://{0}'.format(mirror_dir.strpath)
mirror_cmd('add', 'test-mirror', mirror_url)
s = Spec('libdwarf').concretized()
# Install a package
install_cmd(s.name)
# Put installed package in the buildcache
buildcache_cmd('create', '-u', '-a', '-d', mirror_dir.strpath, s.name)
rebuild = bindist.needs_rebuild(s, mirror_url, rebuild_on_errors=True)
assert not rebuild
# Now monkey patch Spec to change the full hash on the package
def fake_full_hash(spec):
print('fake_full_hash')
return 'tal4c7h4z0gqmixb1eqa92mjoybxn5l6'
monkeypatch.setattr(spack.spec.Spec, 'full_hash', fake_full_hash)
rebuild = bindist.needs_rebuild(s, mirror_url, rebuild_on_errors=True)
assert rebuild

View file

@ -28,7 +28,7 @@
def mock_get_specs(database, monkeypatch):
specs = database.query_local()
monkeypatch.setattr(
spack.binary_distribution, 'get_specs', lambda: specs
spack.binary_distribution, 'update_cache_and_get_specs', lambda: specs
)
@ -43,7 +43,7 @@ def mock_get_specs_multiarch(database, monkeypatch):
break
monkeypatch.setattr(
spack.binary_distribution, 'get_specs', lambda: specs
spack.binary_distribution, 'update_cache_and_get_specs', lambda: specs
)

View file

@ -119,7 +119,11 @@ def test_ci_generate_with_env(tmpdir, mutable_mock_env_path, env_deactivate,
- old-gcc-pkgs:
- archive-files
- callpath
- hypre@0.2.15
# specify ^openblas-with-lapack to ensure that builtin.mock repo flake8
# package (which can also provide lapack) is not chosen, as it violates
# a package-level check which requires exactly one fetch strategy (this
# is apparently not an issue for other tests that use it).
- hypre@0.2.15 ^openblas-with-lapack
specs:
- matrix:
- [$old-gcc-pkgs]

View file

@ -809,3 +809,61 @@ def test_install_fails_no_args_suggests_env_activation(tmpdir):
assert 'requires a package argument or active environment' in output
assert 'spack env activate .' in output
assert 'using the `spack.yaml` in this directory' in output
def test_cache_install_full_hash_match(
install_mockery_mutable_config, mock_packages, mock_fetch,
mock_archive, mutable_config, monkeypatch, tmpdir):
"""Make sure installing from cache respects full hash argument"""
# Create a temp mirror directory for buildcache usage
mirror_dir = tmpdir.join('mirror_dir')
mirror_url = 'file://{0}'.format(mirror_dir.strpath)
s = Spec('libdwarf').concretized()
# Install a package
install(s.name)
# Put installed package in the buildcache
buildcache('create', '-u', '-a', '-f', '-d', mirror_dir.strpath, s.name)
# Now uninstall the package
uninstall('-y', s.name)
# Configure the mirror with the binary package in it
mirror('add', 'test-mirror', mirror_url)
# Make sure we get the binary version by default
install_output = install('--no-check-signature', s.name, output=str)
expect_msg = 'Extracting {0} from binary cache'.format(s.name)
assert expect_msg in install_output
uninstall('-y', s.name)
# Now monkey patch Spec to change the full hash on the package
def fake_full_hash(spec):
print('fake_full_hash')
return 'tal4c7h4z0gqmixb1eqa92mjoybxn5l6'
monkeypatch.setattr(spack.spec.Spec, 'full_hash', fake_full_hash)
# Check that even if the full hash changes, we install from binary when
# we don't explicitly require the full hash to match
install_output = install('--no-check-signature', s.name, output=str)
expect_msg = 'Extracting {0} from binary cache'.format(s.name)
assert expect_msg in install_output
uninstall('-y', s.name)
# Finally, make sure that if we insist on the full hash match, spack
# installs from source.
install_output = install('--require-full-hash-match', s.name, output=str)
expect_msg = 'No binary for {0} found: installing from source'.format(
s.name)
assert expect_msg in install_output
uninstall('-y', s.name)
mirror('rm', 'test-mirror')

View file

@ -177,7 +177,7 @@ def test_process_binary_cache_tarball_none(install_mockery, monkeypatch,
def test_process_binary_cache_tarball_tar(install_mockery, monkeypatch, capfd):
"""Tests of _process_binary_cache_tarball with a tar file."""
def _spec(spec):
def _spec(spec, preferred_mirrors=None):
return spec
# Skip binary distribution functionality since assume tested elsewhere
@ -196,14 +196,18 @@ def _spec(spec):
def test_try_install_from_binary_cache(install_mockery, mock_packages,
monkeypatch, capsys):
"""Tests SystemExit path for_try_install_from_binary_cache."""
def _spec(spec, force):
def _mirrors_for_spec(spec, force, full_hash_match=False):
spec = spack.spec.Spec('mpi').concretized()
return {spec: None}
return [{
'mirror_url': 'notused',
'spec': spec,
}]
spec = spack.spec.Spec('mpich')
spec.concretize()
monkeypatch.setattr(spack.binary_distribution, 'get_spec', _spec)
monkeypatch.setattr(
spack.binary_distribution, 'get_mirrors_for_spec', _mirrors_for_spec)
with pytest.raises(SystemExit):
inst._try_install_from_binary_cache(spec.package, False, False)

View file

@ -351,11 +351,12 @@ def test_multiple_specs_with_hash(self, database):
@pytest.mark.db
def test_ambiguous_hash(self, mutable_database):
x1 = Spec('a')
x1.concretize()
x1._hash = 'xy'
x1._concrete = True
x2 = Spec('a')
x2.concretize()
x2._hash = 'xx'
x2._concrete = True
mutable_database.add(x1, spack.store.layout)
mutable_database.add(x2, spack.store.layout)

View file

@ -67,6 +67,11 @@ def possible_dependencies(
return visited
def content_hash(self):
# Unlike real packages, MockPackage doesn't have a corresponding
# package.py file; in that sense, the content_hash is always the same.
return self.__class__.__name__
class MockPackageMultiRepo(object):
"""Mock package repository, mimicking ``spack.repo.Repo``."""

View file

@ -1020,7 +1020,7 @@ _spack_info() {
_spack_install() {
if $list_options
then
SPACK_COMPREPLY="-h --help --only -u --until -j --jobs --overwrite --fail-fast --keep-prefix --keep-stage --dont-restage --use-cache --no-cache --cache-only --no-check-signature --show-log-on-error --source -n --no-checksum -v --verbose --fake --only-concrete -f --file --clean --dirty --test --run-tests --log-format --log-file --help-cdash -y --yes-to-all --cdash-upload-url --cdash-build --cdash-site --cdash-track --cdash-buildstamp"
SPACK_COMPREPLY="-h --help --only -u --until -j --jobs --overwrite --fail-fast --keep-prefix --keep-stage --dont-restage --use-cache --no-cache --cache-only --no-check-signature --require-full-hash-match --show-log-on-error --source -n --no-checksum -v --verbose --fake --only-concrete -f --file --clean --dirty --test --run-tests --log-format --log-file --help-cdash -y --yes-to-all --cdash-upload-url --cdash-build --cdash-site --cdash-track --cdash-buildstamp"
else
_all_packages
fi