Hashing: force hash consistency for values read from config (#18446)

The 'external_modules' attribute on a Spec, when read from a YAML
configuration file, may contain extra formatting that is lost when
that Spec is written-to/read-from JSON format. This was resulting in
a hashing instability (when the Spec was read back, it would report a
different hash). This commit adds a function which removes the extra
formatting from 'external_modules' as it is passed to the Spec in
__init__ to ensure a consistent hash.
This commit is contained in:
Massimiliano Culpo 2020-09-03 19:49:36 +02:00 committed by GitHub
parent 741bb9bafe
commit fab2622a71
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 3 deletions

View file

@ -1008,7 +1008,7 @@ def __init__(self, spec_like=None,
self._normal = normal
self._concrete = concrete
self.external_path = external_path
self.external_modules = external_modules
self.external_modules = Spec._format_module_list(external_modules)
self._full_hash = full_hash
# This attribute is used to store custom information for
@ -1025,6 +1025,24 @@ def __init__(self, spec_like=None,
elif spec_like is not None:
raise TypeError("Can't make spec out of %s" % type(spec_like))
@staticmethod
def _format_module_list(modules):
"""Return a module list that is suitable for YAML serialization
and hash computation.
Given a module list, possibly read from a configuration file,
return an object that serializes to a consistent YAML string
before/after round-trip serialization to/from a Spec dictionary
(stored in JSON format): when read in, the module list may
contain YAML formatting that is discarded (non-essential)
when stored as a Spec dictionary; we take care in this function
to discard such formatting such that the Spec hash does not
change before/after storage in JSON.
"""
if modules:
modules = list(modules)
return modules
@property
def external(self):
return bool(self.external_path) or bool(self.external_modules)
@ -1383,8 +1401,8 @@ def _spec_hash(self, hash):
"""
# TODO: curently we strip build dependencies by default. Rethink
# this when we move to using package hashing on all specs.
yaml_text = syaml.dump(
self.to_node_dict(hash=hash), default_flow_style=True)
node_dict = self.to_node_dict(hash=hash)
yaml_text = syaml.dump(node_dict, default_flow_style=True)
sha = hashlib.sha1(yaml_text.encode('utf-8'))
b32_hash = base64.b32encode(sha.digest()).lower()

View file

@ -2154,3 +2154,38 @@ def test_can_update_attributes_with_override(tmpdir):
# Check that an update does not raise
env('update', '-y', str(abspath.dirname))
@pytest.mark.regression('18338')
def test_newline_in_commented_sequence_is_not_an_issue(tmpdir):
spack_yaml = """
spack:
specs:
- dyninst
packages:
libelf:
externals:
- spec: libelf@0.8.13
modules:
- libelf/3.18.1
concretization: together
"""
abspath = tmpdir.join('spack.yaml')
abspath.write(spack_yaml)
def extract_build_hash(environment):
_, dyninst = next(iter(environment.specs_by_hash.items()))
return dyninst['libelf'].build_hash()
# Concretize a first time and create a lockfile
with ev.Environment(str(tmpdir)) as e:
concretize()
libelf_first_hash = extract_build_hash(e)
# Check that a second run won't error
with ev.Environment(str(tmpdir)) as e:
concretize()
libelf_second_hash = extract_build_hash(e)
assert libelf_first_hash == libelf_second_hash