hashes: consolidate and make hashing logic more consistent

Spack has evolved to have three types of hash functions, and it's
becoming hard to tell when each one is called. Whlie we aren't yet ready
to get rid of them, we can refactor them so that the code is clearer and
easier to track.

- Add a `hash_types` module with concise descriptors for hashes.

- Consolidate hashing logic in a private `Spec._spec_hash()` function.

- `dag_hash()`, `build_hash()`, and `full_hash()` all call `_spec_hash()`

- `to_node_dict()`, `to_dict()`, `to_yaml()` and `to_json()` now take a
  `hash` parameter consistent with the one that `_spec_hash()` requires.

Co-authored-by: Todd Gamblin <tgamblin@llnl.gov>
This commit is contained in:
Peter Josef Scheibel 2019-07-12 19:06:17 -07:00 committed by Greg Becker
parent 0715b512a1
commit 7ec1d320a8
8 changed files with 252 additions and 82 deletions

View file

@ -12,15 +12,16 @@
import spack.cmd
import spack.cmd.common.arguments as arguments
import spack.environment as ev
import spack.hash_types as ht
import spack.relocate
import spack.repo
import spack.spec
import spack.store
from spack.error import SpecError
import spack.config
import spack.repo
import spack.store
from spack.error import SpecError
from spack.paths import etc_path
from spack.spec import Spec, save_dependency_spec_yamls
from spack.spec_set import CombinatorialSpecSet
@ -543,7 +544,7 @@ def save_spec_yamls(args):
root_spec = Spec(args.root_spec)
root_spec.concretize()
root_spec_as_yaml = root_spec.to_yaml(all_deps=True)
root_spec_as_yaml = root_spec.to_yaml(hash=ht.build_hash)
save_dependency_spec_yamls(
root_spec_as_yaml, args.yaml_dir, args.specs.split())

View file

@ -20,11 +20,13 @@
from llnl.util.tty.color import colorize
import spack.error
import spack.hash_types as ht
import spack.repo
import spack.schema.env
import spack.spec
import spack.util.spack_json as sjson
import spack.config
from spack.filesystem_view import YamlFilesystemView
from spack.util.environment import EnvironmentModifications
import spack.architecture as architecture
@ -884,7 +886,7 @@ def install(self, user_spec, concrete_spec=None, **install_args):
# spec might be in the user_specs, but not installed.
# TODO: Redo name-based comparison for old style envs
spec = next(s for s in self.user_specs if s.satisfies(user_spec))
concrete = self.specs_by_hash.get(spec.dag_hash(all_deps=True))
concrete = self.specs_by_hash.get(spec.build_hash())
if not concrete:
concrete = spec.concretized()
self._add_concrete_spec(spec, concrete)
@ -996,7 +998,7 @@ def _add_concrete_spec(self, spec, concrete, new=True):
# update internal lists of specs
self.concretized_user_specs.append(spec)
h = concrete.dag_hash(all_deps=True)
h = concrete.build_hash()
self.concretized_order.append(h)
self.specs_by_hash[h] = concrete
@ -1111,9 +1113,9 @@ def _to_lockfile_dict(self):
concrete_specs = {}
for spec in self.specs_by_hash.values():
for s in spec.traverse():
dag_hash_all = s.dag_hash(all_deps=True)
dag_hash_all = s.build_hash()
if dag_hash_all not in concrete_specs:
spec_dict = s.to_node_dict(all_deps=True)
spec_dict = s.to_node_dict(hash=ht.build_hash)
spec_dict[s.name]['hash'] = s.dag_hash()
concrete_specs[dag_hash_all] = spec_dict
@ -1172,7 +1174,7 @@ def _read_lockfile_dict(self, d):
self.specs_by_hash = {}
for _, spec in specs_by_hash.items():
dag_hash = spec.dag_hash()
build_hash = spec.dag_hash(all_deps=True)
build_hash = spec.build_hash()
if dag_hash in root_hashes:
old_hash_to_new[dag_hash] = build_hash

View file

@ -0,0 +1,36 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""Definitions that control how Spack creates Spec hashes."""
import spack.dependency as dp
class SpecHashDescriptor(object):
"""This class defines how hashes are generated on Spec objects.
Spec hashes in Spack are generated from a serialized (e.g., with
YAML) representation of the Spec graph. The representation may only
include certain dependency types, and it may optionally include a
canonicalized hash of the package.py for each node in the graph.
We currently use different hashes for different use cases.
"""
def __init__(self, deptype=('link', 'run'), package_hash=False):
self.deptype = dp.canonical_deptype(deptype)
self.package_hash = package_hash
#: Default Hash descriptor, used by Spec.dag_hash() and stored in the DB.
dag_hash = SpecHashDescriptor(deptype=('link', 'run'), package_hash=False)
#: Hash descriptor that includes build dependencies.
build_hash = SpecHashDescriptor(
deptype=('build', 'link', 'run'), package_hash=False)
#: Full hash used in build pipelines to determine when to rebuild packages.
full_hash = SpecHashDescriptor(deptype=('link', 'run'), package_hash=True)

View file

@ -100,14 +100,15 @@
import spack.architecture
import spack.compiler
import spack.compilers as compilers
import spack.dependency as dp
import spack.error
import spack.hash_types as ht
import spack.parse
import spack.repo
import spack.store
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
from spack.dependency import Dependency, all_deptypes, canonical_deptype
from spack.util.module_cmd import get_path_from_module, load_module
from spack.error import NoLibrariesError, NoHeadersError
from spack.error import SpecError, UnsatisfiableSpecError
@ -966,7 +967,7 @@ def get_dependency(self, name):
self.name + " does not depend on " + comma_or(name))
def _find_deps(self, where, deptype):
deptype = canonical_deptype(deptype)
deptype = dp.canonical_deptype(deptype)
return [dep for dep in where.values()
if deptype and (not dep.deptypes or
@ -1192,7 +1193,7 @@ def traverse_edges(self, visited=None, d=0, deptype='all',
cover = kwargs.get('cover', 'nodes')
direction = kwargs.get('direction', 'children')
order = kwargs.get('order', 'pre')
deptype = canonical_deptype(deptype)
deptype = dp.canonical_deptype(deptype)
# Make sure kwargs have legal values; raise ValueError if not.
def validate(name, val, allowed_values):
@ -1286,60 +1287,129 @@ def prefix(self):
def prefix(self, value):
self._prefix = Prefix(value)
def dag_hash(self, length=None, all_deps=False):
"""Return a hash of the entire spec DAG, including connectivity."""
if not self.concrete:
h = self._dag_hash(all_deps=all_deps)
# An upper bound of None is equivalent to len(h). An upper bound of
# 0 produces the empty string
return h[:length]
def _spec_hash(self, hash):
"""Utility method for computing different types of Spec hashes.
if not self._hash:
self._hash = self._dag_hash(all_deps=False)
if not self._build_hash:
self._build_hash = self._dag_hash(all_deps=True)
h = self._build_hash if all_deps else self._hash
return h[:length]
def _dag_hash(self, all_deps=False):
yaml_text = syaml.dump(
self.to_node_dict(all_deps=all_deps),
default_flow_style=True,
width=maxint)
Arguments:
hash (SpecHashDescriptor): type of hash to generate.
"""
# TODO: curently we strip build dependencies by default. Rethink
# this when we move to using package hashing on all specs.
yaml_text = syaml.dump(self.to_node_dict(hash=hash),
default_flow_style=True, width=maxint)
sha = hashlib.sha1(yaml_text.encode('utf-8'))
b32_hash = base64.b32encode(sha.digest()).lower()
if sys.version_info[0] >= 3:
b32_hash = b32_hash.decode('utf-8')
return b32_hash
def _cached_hash(self, length, attr, hash):
"""Helper function for storing a cached hash on the spec.
This will run _spec_hash() with the deptype and package_hash
parameters, and if this spec is concrete, it will store the value
in the supplied attribute on this spec.
Arguments:
hash (SpecHashDescriptor): type of hash to generate.
"""
hash_string = getattr(self, attr, None)
if hash_string:
return hash_string[:length]
else:
hash_string = self._spec_hash(hash)
if self.concrete:
setattr(self, attr, hash_string)
return hash_string[:length]
def dag_hash(self, length=None):
"""This is Spack's default hash, used to identify installations.
At the moment, it excludes build dependencies to avoid rebuilding
packages whenever build dependency versions change. We will
revise this to include more detailed provenance when the
concretizer can more aggressievly reuse installed dependencies.
"""
return self._cached_hash(length, '_hash', ht.dag_hash)
def build_hash(self, length=None):
"""Hash used to store specs in environments.
This hash includes build dependencies, and we need to preserve
them to be able to rebuild an entire environment for a user.
"""
return self._cached_hash(length, '_build_hash', ht.build_hash)
def full_hash(self, length=None):
"""Hash to determine when to rebuild packages in the build pipeline.
This hash includes the package hash, so that we know when package
files has changed between builds. It does not currently include
build dependencies, though it likely should.
TODO: investigate whether to include build deps here.
"""
return self._cached_hash(length, '_full_hash', ht.full_hash)
def dag_hash_bit_prefix(self, bits):
"""Get the first <bits> bits of the DAG hash as an integer type."""
return base32_prefix_bits(self.dag_hash(), bits)
def full_hash(self, length=None):
if not self.concrete:
raise SpecError("Spec is not concrete: " + str(self))
def to_node_dict(self, hash=ht.dag_hash):
"""Create a dictionary representing the state of this Spec.
if not self._full_hash:
yaml_text = syaml.dump(
self.to_node_dict(hash_function=lambda s: s.full_hash()),
default_flow_style=True, width=maxint)
package_hash = self.package.content_hash()
sha = hashlib.sha1(yaml_text.encode('utf-8') + package_hash)
``to_node_dict`` creates the content that is eventually hashed by
Spack to create identifiers like the DAG hash (see
``dag_hash()``). Example result of ``to_node_dict`` for the
``sqlite`` package::
b32_hash = base64.b32encode(sha.digest()).lower()
if sys.version_info[0] >= 3:
b32_hash = b32_hash.decode('utf-8')
{
'sqlite': {
'version': '3.28.0',
'arch': {
'platform': 'darwin',
'platform_os': 'mojave',
'target': 'x86_64',
},
'compiler': {
'name': 'clang',
'version': '10.0.0-apple',
},
'namespace': 'builtin',
'parameters': {
'fts': 'true',
'functions': 'false',
'cflags': [],
'cppflags': [],
'cxxflags': [],
'fflags': [],
'ldflags': [],
'ldlibs': [],
},
'dependencies': {
'readline': {
'hash': 'zvaa4lhlhilypw5quj3akyd3apbq5gap',
'type': ['build', 'link'],
}
},
}
}
self._full_hash = b32_hash
Note that the dictionary returned does *not* include the hash of
the *root* of the spec, though it does include hashes for each
dependency, and (optionally) the package file corresponding to
each node.
return self._full_hash[:length]
See ``to_dict()`` for a "complete" spec hash, with hashes for
each node and nodes for each dependency (instead of just their
hashes).
def to_node_dict(self, hash_function=None, all_deps=False):
Arguments:
hash (SpecHashDescriptor) type of hash to generate.
"""
d = syaml_dict()
if self.versions:
@ -1378,47 +1448,102 @@ def to_node_dict(self, hash_function=None, all_deps=False):
if hasattr(variant, '_patches_in_order_of_appearance'):
d['patches'] = variant._patches_in_order_of_appearance
# TODO: restore build dependencies here once we have less picky
# TODO: concretization.
if all_deps:
deptypes = ('link', 'run', 'build')
else:
deptypes = ('link', 'run')
deps = self.dependencies_dict(deptype=deptypes)
if hash.package_hash:
d['package_hash'] = self.package.content_hash()
deps = self.dependencies_dict(deptype=hash.deptype)
if deps:
if hash_function is None:
hash_function = lambda s: s.dag_hash(all_deps=all_deps)
d['dependencies'] = syaml_dict([
(name,
syaml_dict([
('hash', hash_function(dspec.spec)),
('hash', dspec.spec._spec_hash(hash)),
('type', sorted(str(s) for s in dspec.deptypes))])
) for name, dspec in sorted(deps.items())
])
return syaml_dict([(self.name, d)])
def to_dict(self, all_deps=False):
if all_deps:
deptypes = ('link', 'run', 'build')
else:
deptypes = ('link', 'run')
def to_dict(self, hash=ht.dag_hash):
"""Create a dictionary suitable for writing this spec to YAML or JSON.
This dictionaries like the one that is ultimately written to a
``spec.yaml`` file in each Spack installation directory. For
example, for sqlite::
{
'spec': [
{
'sqlite': {
'version': '3.28.0',
'arch': {
'platform': 'darwin',
'platform_os': 'mojave',
'target': 'x86_64',
},
'compiler': {
'name': 'clang',
'version': '10.0.0-apple',
},
'namespace': 'builtin',
'parameters': {
'fts': 'true',
'functions': 'false',
'cflags': [],
'cppflags': [],
'cxxflags': [],
'fflags': [],
'ldflags': [],
'ldlibs': [],
},
'dependencies': {
'readline': {
'hash': 'zvaa4lhlhilypw5quj3akyd3apbq5gap',
'type': ['build', 'link'],
}
},
'hash': '722dzmgymxyxd6ovjvh4742kcetkqtfs'
}
},
# ... more node dicts for readline and its dependencies ...
]
}
Note that this dictionary starts with the 'spec' key, and what
follows is a list starting with the root spec, followed by its
dependencies in preorder. Each node in the list also has a
'hash' key that contains the hash of the node *without* the hash
field included.
In the example, the package content hash is not included in the
spec, but if ``package_hash`` were true there would be an
additional field on each node called ``package_hash``.
``from_dict()`` can be used to read back in a spec that has been
converted to a dictionary, serialized, and read back in.
Arguments:
deptype (tuple or str): dependency types to include when
traversing the spec.
package_hash (bool): whether to include package content
hashes in the dictionary.
"""
node_list = []
for s in self.traverse(order='pre', deptype=deptypes):
node = s.to_node_dict(all_deps=all_deps)
for s in self.traverse(order='pre', deptype=hash.deptype):
node = s.to_node_dict(hash)
node[s.name]['hash'] = s.dag_hash()
if all_deps:
node[s.name]['build_hash'] = s.dag_hash(all_deps=True)
if 'build' in hash.deptype:
node[s.name]['build_hash'] = s.build_hash()
node_list.append(node)
return syaml_dict([('spec', node_list)])
def to_yaml(self, stream=None, all_deps=False):
def to_yaml(self, stream=None, hash=ht.dag_hash):
return syaml.dump(
self.to_dict(all_deps), stream=stream, default_flow_style=False)
self.to_dict(hash), stream=stream, default_flow_style=False)
def to_json(self, stream=None):
return sjson.dump(self.to_dict(), stream)
def to_json(self, stream=None, hash=ht.dag_hash):
return sjson.dump(self.to_dict(hash), stream)
@staticmethod
def from_node_dict(node):
@ -2135,7 +2260,7 @@ def _evaluate_dependency_conditions(self, name):
for when_spec, dependency in conditions.items():
if self.satisfies(when_spec, strict=True):
if dep is None:
dep = Dependency(self.name, Spec(name), type=())
dep = dp.Dependency(self.name, Spec(name), type=())
try:
dep.merge(dependency)
except UnsatisfiableSpecError as e:
@ -2814,13 +2939,13 @@ def _dup(self, other, deps=True, cleardeps=True, caches=None):
# If we preserved the original structure, we can copy them
# safely. If not, they need to be recomputed.
if caches is None:
caches = (deps is True or deps == all_deptypes)
caches = (deps is True or deps == dp.all_deptypes)
# If we copy dependencies, preserve DAG structure in the new spec
if deps:
# If caller restricted deptypes to be copied, adjust that here.
# By default, just copy all deptypes
deptypes = all_deptypes
deptypes = dp.all_deptypes
if isinstance(deps, (tuple, list)):
deptypes = deps
self._dup_deps(other, deptypes, caches)
@ -3621,7 +3746,7 @@ def tree(self, **kwargs):
types = set(dep_spec.deptypes)
out += '['
for t in all_deptypes:
for t in dp.all_deptypes:
out += ''.join(t[0] if t in types else ' ')
out += '] '
@ -3980,7 +4105,7 @@ def save_dependency_spec_yamls(
yaml_path = os.path.join(output_directory, '{0}.yaml'.format(dep_name))
with open(yaml_path, 'w') as fd:
fd.write(dep_spec.to_yaml(all_deps=True))
fd.write(dep_spec.to_yaml(hash=ht.build_hash))
def base32_prefix_bits(hash_string, bits):

View file

@ -10,8 +10,10 @@
import llnl.util.filesystem as fs
import spack.hash_types as ht
import spack.modules
import spack.environment as ev
from spack.cmd.env import _env_create
from spack.spec import Spec
from spack.main import SpackCommand
@ -643,7 +645,8 @@ def create_v1_lockfile_dict(roots, all_specs):
# Version one lockfiles use the dag hash without build deps as keys,
# but they write out the full node dict (including build deps)
"concrete_specs": dict(
(s.dag_hash(), s.to_node_dict(all_deps=True)) for s in all_specs
(s.dag_hash(), s.to_node_dict(hash=ht.build_hash))
for s in all_specs
)
}
return test_lockfile_dict
@ -676,8 +679,8 @@ def test_read_old_lock_and_write_new(tmpdir):
# When the lockfile is rewritten, it should adopt the new hash scheme
# which accounts for all dependencies, including build dependencies
assert hashes == set([
x.dag_hash(all_deps=True),
y.dag_hash(all_deps=True)])
x.build_hash(),
y.build_hash()])
@pytest.mark.usefixtures('config')

View file

@ -15,6 +15,7 @@
import llnl.util.filesystem as fs
import spack.config
import spack.hash_types as ht
import spack.package
import spack.cmd.install
from spack.error import SpackError
@ -540,7 +541,7 @@ def test_cdash_install_from_spec_yaml(tmpdir, mock_fetch, install_mockery,
pkg_spec.concretize()
with open(spec_yaml_path, 'w') as fd:
fd.write(pkg_spec.to_yaml(all_deps=True))
fd.write(pkg_spec.to_yaml(hash=ht.build_hash))
install(
'--log-format=cdash',

View file

@ -114,7 +114,7 @@ def test_installed_deps():
c_spec.concretize()
assert c_spec['d'].version == spack.version.Version('2')
c_installed = spack.spec.Spec.from_dict(c_spec.to_dict(all_deps=False))
c_installed = spack.spec.Spec.from_dict(c_spec.to_dict())
for spec in c_installed.traverse():
setattr(spec.package, 'installed', True)

View file

@ -12,8 +12,10 @@
from collections import Iterable, Mapping
import spack.hash_types as ht
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
from spack import repo
from spack.spec import Spec, save_dependency_spec_yamls
from spack.util.spack_yaml import syaml_dict
@ -231,7 +233,7 @@ def test_save_dependency_spec_yamls_subset(tmpdir, config):
spec_a.concretize()
b_spec = spec_a['b']
c_spec = spec_a['c']
spec_a_yaml = spec_a.to_yaml(all_deps=True)
spec_a_yaml = spec_a.to_yaml(hash=ht.build_hash)
save_dependency_spec_yamls(spec_a_yaml, output_path, ['b', 'c'])