unparser: drop Python 2, fix testing bugs with newer Pythons (#38424)

The `unparser` that Spack uses for package hashing had several tweaks to ensure compatibility
with Python 2.7:
1. Currently, the unparser automatically moves `*` and `**` args to the end to preserve
   compatibility with `python@:3.4`
2. `print a, b, c` statements and single-tuple `print((a, b, c))` function calls were
   remapped to `print(a, b, c)` in the unparsed output for consistency across versions.

(1) is causing issues in our tests because a recent patch to the Python source code  
(https://github.com/python/cpython/pull/102953/files#diff-7972dffec6674d5f09410c71766ac6caacb95b9bccbf032061806ae304519c9bR813-R823) 
has a `**` arg before an named argument, and we round-trip the core python source code
as a test of our unparser.  This isn't actually a break with our consistent unpausing -- it's still
consistent, the python source just doesn't unparse to the same thing anymore. It does makes
it harder to test, so it's not worth maintaining the Python2-specific stuff anymore.

Since we only support `python@3.6:`, this PR removes (1) and (2) from the unparser, but keeps
one last tweak for unicode AST inconsistencies, as it's still needed for Python 3.5-3.7.

This fixes the CI error we've been seeing on `python@3.11.4` and `python@3.10.12`. Again, that
bug exists only in the test system and doesn't affect our canonical hashing of Python code.
This commit is contained in:
Greg Becker 2023-06-16 17:52:26 -07:00 committed by GitHub
parent a3c42715db
commit 0ac1c52d17
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 129 deletions

View file

@ -337,15 +337,15 @@ def test_remove_complex_package_logic_filtered():
("grads", "rrlmwml3f2frdnqavmro3ias66h5b2ce"), ("grads", "rrlmwml3f2frdnqavmro3ias66h5b2ce"),
("llvm", "nufffum5dabmaf4l5tpfcblnbfjknvd3"), ("llvm", "nufffum5dabmaf4l5tpfcblnbfjknvd3"),
# has @when("@4.1.0") and raw unicode literals # has @when("@4.1.0") and raw unicode literals
("mfem", "qtneutm6khd6epd2rhyuv2y6zavsxbed"), ("mfem", "lbhr43gm5zdye2yhqznucxb4sg6vhryl"),
("mfem@4.0.0", "qtneutm6khd6epd2rhyuv2y6zavsxbed"), ("mfem@4.0.0", "lbhr43gm5zdye2yhqznucxb4sg6vhryl"),
("mfem@4.1.0", "uit2ydzhra3b2mlvnq262qlrqqmuwq3d"), ("mfem@4.1.0", "vjdjdgjt6nyo7ited2seki5epggw5gza"),
# has @when("@1.5.0:") # has @when("@1.5.0:")
("py-torch", "qs7djgqn7dy7r3ps4g7hv2pjvjk4qkhd"), ("py-torch", "qs7djgqn7dy7r3ps4g7hv2pjvjk4qkhd"),
("py-torch@1.0", "qs7djgqn7dy7r3ps4g7hv2pjvjk4qkhd"), ("py-torch@1.0", "qs7djgqn7dy7r3ps4g7hv2pjvjk4qkhd"),
("py-torch@1.6", "p4ine4hc6f2ik2f2wyuwieslqbozll5w"), ("py-torch@1.6", "p4ine4hc6f2ik2f2wyuwieslqbozll5w"),
# has a print with multiple arguments # has a print with multiple arguments
("legion", "sffy6vz3dusxnxeetofoomlaieukygoj"), ("legion", "efpfd2c4pzhsbyc3o7plqcmtwm6b57yh"),
# has nested `with when()` blocks and loops # has nested `with when()` blocks and loops
("trilinos", "vqrgscjrla4hi7bllink7v6v6dwxgc2p"), ("trilinos", "vqrgscjrla4hi7bllink7v6v6dwxgc2p"),
], ],

View file

@ -87,28 +87,11 @@ def __init__(self, py_ver_consistent=False, _avoid_backslashes=False):
Arguments: Arguments:
py_ver_consistent (bool): if True, generate unparsed code that is py_ver_consistent (bool): if True, generate unparsed code that is
consistent between Python 2.7 and 3.5-3.10. consistent between Python versions 3.5-3.11.
Consistency is achieved by:
1. Ensuring that *args and **kwargs are always the last arguments,
regardless of the python version, because Python 2's AST does not
have sufficient information to reconstruct star-arg order.
2. Always unparsing print as a function.
3. Unparsing Python3 unicode literals the way Python 2 would.
Without these changes, the same source can generate different code for Python 2
and Python 3, depending on subtle AST differences. The first of these two
causes this module to behave differently from Python 3.8+'s `ast.unparse()`
One place where single source will generate an inconsistent AST is with
multi-argument print statements, e.g.::
print("foo", "bar", "baz")
In Python 2, this prints a tuple; in Python 3, it is the print function with
multiple arguments. Use ``from __future__ import print_function`` to avoid
this inconsistency.
For legacy reasons, consistency is achieved by unparsing Python3 unicode literals
the way Python 2 would. This preserved Spack package hash consistency during the
python2/3 transition
""" """
self.future_imports = [] self.future_imports = []
self._indent = 0 self._indent = 0
@ -299,61 +282,6 @@ def visit_Exec(self, node):
self.write(", ") self.write(", ")
self.dispatch(node.locals) self.dispatch(node.locals)
def visit_Print(self, node):
# Use print function so that python 2 unparsing is consistent with 3
if self._py_ver_consistent:
self.fill("print")
with self.delimit("(", ")"):
values = node.values
# Can't tell print(foo, bar, baz) and print((foo, bar, baz)) apart in
# python 2 and 3, so treat them the same to make hashes consistent.
# Single-tuple print are rare and unlikely to affect package hashes,
# esp. as they likely print to stdout.
if len(values) == 1 and isinstance(values[0], ast.Tuple):
values = node.values[0].elts
do_comma = False
for e in values:
if do_comma:
self.write(", ")
else:
do_comma = True
self.dispatch(e)
if not node.nl:
if do_comma:
self.write(", ")
else:
do_comma = True
self.write("end=''")
if node.dest:
if do_comma:
self.write(", ")
else:
do_comma = True
self.write("file=")
self.dispatch(node.dest)
else:
# unparse Python 2 print statements
self.fill("print ")
do_comma = False
if node.dest:
self.write(">>")
self.dispatch(node.dest)
do_comma = True
for e in node.values:
if do_comma:
self.write(", ")
else:
do_comma = True
self.dispatch(e)
if not node.nl:
self.write(",")
def visit_Global(self, node): def visit_Global(self, node):
self.fill("global ") self.fill("global ")
interleave(lambda: self.write(", "), self.write, node.names) interleave(lambda: self.write(", "), self.write, node.names)
@ -962,25 +890,16 @@ def visit_Call(self, node):
self.set_precedence(_Precedence.ATOM, node.func) self.set_precedence(_Precedence.ATOM, node.func)
args = node.args args = node.args
if self._py_ver_consistent:
# make print(a, b, c) and print((a, b, c)) equivalent, since you can't
# tell them apart between Python 2 and 3. See _Print() for more details.
if getattr(node.func, "id", None) == "print":
if len(node.args) == 1 and isinstance(node.args[0], ast.Tuple):
args = node.args[0].elts
self.dispatch(node.func) self.dispatch(node.func)
with self.delimit("(", ")"): with self.delimit("(", ")"):
comma = False comma = False
# starred arguments last in Python 3.5+, for consistency w/earlier versions # NOTE: this code is no longer compatible with python versions 2.7:3.4
star_and_kwargs = [] # If you run on python@:3.4, you will see instability in package hashes
move_stars_last = sys.version_info[:2] >= (3, 5) # across python versions
for e in args: for e in args:
if move_stars_last and isinstance(e, ast.Starred):
star_and_kwargs.append(e)
else:
if comma: if comma:
self.write(", ") self.write(", ")
else: else:
@ -988,40 +907,12 @@ def visit_Call(self, node):
self.dispatch(e) self.dispatch(e)
for e in node.keywords: for e in node.keywords:
# starting from Python 3.5 this denotes a kwargs part of the invocation
if e.arg is None and move_stars_last:
star_and_kwargs.append(e)
else:
if comma: if comma:
self.write(", ") self.write(", ")
else: else:
comma = True comma = True
self.dispatch(e) self.dispatch(e)
if move_stars_last:
for e in star_and_kwargs:
if comma:
self.write(", ")
else:
comma = True
self.dispatch(e)
if sys.version_info[:2] < (3, 5):
if node.starargs:
if comma:
self.write(", ")
else:
comma = True
self.write("*")
self.dispatch(node.starargs)
if node.kwargs:
if comma:
self.write(", ")
else:
comma = True
self.write("**")
self.dispatch(node.kwargs)
def visit_Subscript(self, node): def visit_Subscript(self, node):
self.set_precedence(_Precedence.ATOM, node.value) self.set_precedence(_Precedence.ATOM, node.value)
self.dispatch(node.value) self.dispatch(node.value)