bugfix: hashes should use ordered dictionaries (#14390)
Despite trying very hard to keep dicts out of our hash algorithm, we seem to still accidentally add them in ways that the tests can't catch. This can cause errors when hashes are not computed deterministically. This fixes an error we saw with Python 3.5, where dictionary iteration order is random. In this instance, we saw a bug when reading Spack environment lockfiles -- The load would fail like this: ``` ... File "/sw/spack/lib/spack/spack/environment.py", line 1249, in concretized_specs yield (s, self.specs_by_hash[h]) KeyError: 'qcttqplkwgxzjlycbs4rfxxladnt423p' ``` This was because the hashes differed depending on whether we wrote `path` or `module` first when recomputing the build hash as part of reading a Spack lockfile. We can fix it by ensuring a determistic iteration order. - [x] Fix two places (one that caused an issue, and one that did not... yet) where our to_node_dict-like methods were using regular python dicts. - [x] Also add a check that statically analyzes our to_node_dict functions and flags any that use Python dicts. The test found the two errors fixed here, specifically: ``` E AssertionError: assert [] == ['Use syaml_dict instead of ...pack/spack/spec.py:1495:28'] E Right contains more items, first extra item: 'Use syaml_dict instead of dict at /Users/gamblin2/src/spack/lib/spack/spack/spec.py:1495:28' E Full diff: E - [] E + ['Use syaml_dict instead of dict at ' E + '/Users/gamblin2/src/spack/lib/spack/spack/spec.py:1495:28'] ``` and ``` E AssertionError: assert [] == ['Use syaml_dict instead of ...ack/architecture.py:359:15'] E Right contains more items, first extra item: 'Use syaml_dict instead of dict at /Users/gamblin2/src/spack/lib/spack/spack/architecture.py:359:15' E Full diff: E - [] E + ['Use syaml_dict instead of dict at ' E + '/Users/gamblin2/src/spack/lib/spack/spack/architecture.py:359:15'] ```
This commit is contained in:
parent
8283d87f6a
commit
2eadfa24e9
3 changed files with 81 additions and 8 deletions
|
@ -356,10 +356,10 @@ def _cmp_key(self):
|
||||||
return (self.name, self.version)
|
return (self.name, self.version)
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
return {
|
return syaml_dict([
|
||||||
'name': self.name,
|
('name', self.name),
|
||||||
'version': self.version
|
('version', self.version)
|
||||||
}
|
])
|
||||||
|
|
||||||
|
|
||||||
@key_ordering
|
@key_ordering
|
||||||
|
|
|
@ -1492,10 +1492,10 @@ def to_node_dict(self, hash=ht.dag_hash):
|
||||||
d['parameters'] = params
|
d['parameters'] = params
|
||||||
|
|
||||||
if self.external:
|
if self.external:
|
||||||
d['external'] = {
|
d['external'] = syaml.syaml_dict([
|
||||||
'path': self.external_path,
|
('path', self.external_path),
|
||||||
'module': self.external_module
|
('module', self.external_module),
|
||||||
}
|
])
|
||||||
|
|
||||||
if not self._concrete:
|
if not self._concrete:
|
||||||
d['concrete'] = False
|
d['concrete'] = False
|
||||||
|
|
|
@ -8,13 +8,20 @@
|
||||||
YAML format preserves DAG information in the spec.
|
YAML format preserves DAG information in the spec.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
import ast
|
||||||
|
import inspect
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from collections import Iterable, Mapping
|
from collections import Iterable, Mapping
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import spack.architecture
|
||||||
import spack.hash_types as ht
|
import spack.hash_types as ht
|
||||||
|
import spack.spec
|
||||||
import spack.util.spack_json as sjson
|
import spack.util.spack_json as sjson
|
||||||
import spack.util.spack_yaml as syaml
|
import spack.util.spack_yaml as syaml
|
||||||
|
import spack.version
|
||||||
|
|
||||||
from spack import repo
|
from spack import repo
|
||||||
from spack.spec import Spec, save_dependency_spec_yamls
|
from spack.spec import Spec, save_dependency_spec_yamls
|
||||||
|
@ -204,6 +211,72 @@ def test_ordered_read_not_required_for_consistent_dag_hash(
|
||||||
assert spec.full_hash() == round_trip_reversed_json_spec.full_hash()
|
assert spec.full_hash() == round_trip_reversed_json_spec.full_hash()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("module", [
|
||||||
|
spack.spec,
|
||||||
|
spack.architecture,
|
||||||
|
spack.version,
|
||||||
|
])
|
||||||
|
def test_hashes_use_no_python_dicts(module):
|
||||||
|
"""Coarse check to make sure we don't use dicts in Spec.to_node_dict().
|
||||||
|
|
||||||
|
Python dicts are not guaranteed to iterate in a deterministic order
|
||||||
|
(at least not in all python versions) so we need to use lists and
|
||||||
|
syaml_dicts. syaml_dicts are ordered and ensure that hashes in Spack
|
||||||
|
are deterministic.
|
||||||
|
|
||||||
|
This test is intended to handle cases that are not covered by the
|
||||||
|
consistency checks above, or that would be missed by a dynamic check.
|
||||||
|
This test traverses the ASTs of functions that are used in our hash
|
||||||
|
algorithms, finds instances of dictionaries being constructed, and
|
||||||
|
prints out the line numbers where they occur.
|
||||||
|
|
||||||
|
"""
|
||||||
|
class FindFunctions(ast.NodeVisitor):
|
||||||
|
"""Find a function definition called to_node_dict."""
|
||||||
|
def __init__(self):
|
||||||
|
self.nodes = []
|
||||||
|
|
||||||
|
def visit_FunctionDef(self, node): # noqa
|
||||||
|
if node.name in ("to_node_dict", "to_dict", "to_dict_or_value"):
|
||||||
|
self.nodes.append(node)
|
||||||
|
|
||||||
|
class FindDicts(ast.NodeVisitor):
|
||||||
|
"""Find source locations of dicts in an AST."""
|
||||||
|
def __init__(self, filename):
|
||||||
|
self.nodes = []
|
||||||
|
self.filename = filename
|
||||||
|
|
||||||
|
def add_error(self, node):
|
||||||
|
self.nodes.append(
|
||||||
|
"Use syaml_dict instead of dict at %s:%s:%s"
|
||||||
|
% (self.filename, node.lineno, node.col_offset)
|
||||||
|
)
|
||||||
|
|
||||||
|
def visit_Dict(self, node): # noqa
|
||||||
|
self.add_error(node)
|
||||||
|
|
||||||
|
def visit_Call(self, node): # noqa
|
||||||
|
name = None
|
||||||
|
if isinstance(node.func, ast.Name):
|
||||||
|
name = node.func.id
|
||||||
|
elif isinstance(node.func, ast.Attribute):
|
||||||
|
name = node.func.attr
|
||||||
|
|
||||||
|
if name == 'dict':
|
||||||
|
self.add_error(node)
|
||||||
|
|
||||||
|
find_functions = FindFunctions()
|
||||||
|
module_ast = ast.parse(inspect.getsource(module))
|
||||||
|
find_functions.visit(module_ast)
|
||||||
|
|
||||||
|
find_dicts = FindDicts(module.__file__)
|
||||||
|
for node in find_functions.nodes:
|
||||||
|
find_dicts.visit(node)
|
||||||
|
|
||||||
|
# fail with offending lines if we found some dicts.
|
||||||
|
assert [] == find_dicts.nodes
|
||||||
|
|
||||||
|
|
||||||
def reverse_all_dicts(data):
|
def reverse_all_dicts(data):
|
||||||
"""Descend into data and reverse all the dictionaries"""
|
"""Descend into data and reverse all the dictionaries"""
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
|
|
Loading…
Reference in a new issue