Speed-up version parsing and comparison (#22973)
The VALID_VERSION regex didn't check that the version string was completely valid, only that a prefix of it was. This version ensures the entire string represents a valid version. This makes a few related changes. 1. Make the SEGMENT_REGEX identify *which* arm it matches by what groups are populated, including whether it's a string or int component or a separator all at once. 2. Use the updated regex to parse the input once with a findall rather than twice, once with findall and once with split, since the version components and separators can be distinguished by their group status. 3. Rather than "convert to int, on exception stay string," if the int group is set then convert to int, if not then construct an instance of the VersionStrComponent class 4. VersionStrComponent now implements all of the special string comparison logic as part of its __lt__ and __eq__ methods to deal with infinity versions and also overloads comparison with integers. 5. Version now uses direct tuple comparison since it has no per-element special logic outside the VersionStrComponent class. Co-authored-by: Massimiliano Culpo <massimiliano.culpo@gmail.com>
This commit is contained in:
parent
ea390198f4
commit
b63a8b3e27
2 changed files with 84 additions and 52 deletions
|
@ -565,3 +565,14 @@ def test_list_highest():
|
||||||
assert vl2.highest_numeric() is None
|
assert vl2.highest_numeric() is None
|
||||||
assert vl2.preferred() == Version('develop')
|
assert vl2.preferred() == Version('develop')
|
||||||
assert vl2.lowest() == Version('master')
|
assert vl2.lowest() == Version('master')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('version_str', [
|
||||||
|
"foo 1.2.0",
|
||||||
|
"!",
|
||||||
|
"1!2"
|
||||||
|
])
|
||||||
|
def test_invalid_versions(version_str):
|
||||||
|
"""Ensure invalid versions are rejected with a ValueError"""
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
Version(version_str)
|
||||||
|
|
|
@ -37,21 +37,15 @@
|
||||||
__all__ = ['Version', 'VersionRange', 'VersionList', 'ver']
|
__all__ = ['Version', 'VersionRange', 'VersionList', 'ver']
|
||||||
|
|
||||||
# Valid version characters
|
# Valid version characters
|
||||||
VALID_VERSION = re.compile(r'[A-Za-z0-9_.-]')
|
VALID_VERSION = re.compile(r'^[A-Za-z0-9_.-]+$')
|
||||||
|
|
||||||
# regex for version segments
|
# regex for version segments
|
||||||
SEGMENT_REGEX = re.compile(r'[a-zA-Z]+|[0-9]+')
|
SEGMENT_REGEX = re.compile(r'(?:(?P<num>[0-9]+)|(?P<str>[a-zA-Z]+))(?P<sep>[_.-]*)')
|
||||||
|
|
||||||
# Infinity-like versions. The order in the list implies the comparison rules
|
# Infinity-like versions. The order in the list implies the comparison rules
|
||||||
infinity_versions = ['develop', 'main', 'master', 'head', 'trunk']
|
infinity_versions = ['develop', 'main', 'master', 'head', 'trunk']
|
||||||
|
|
||||||
|
iv_min_len = min(len(s) for s in infinity_versions)
|
||||||
def int_if_int(string):
|
|
||||||
"""Convert a string to int if possible. Otherwise, return a string."""
|
|
||||||
try:
|
|
||||||
return int(string)
|
|
||||||
except ValueError:
|
|
||||||
return string
|
|
||||||
|
|
||||||
|
|
||||||
def coerce_versions(a, b):
|
def coerce_versions(a, b):
|
||||||
|
@ -96,26 +90,86 @@ def coercing_method(a, b, *args, **kwargs):
|
||||||
return coercing_method
|
return coercing_method
|
||||||
|
|
||||||
|
|
||||||
|
class VersionStrComponent(object):
|
||||||
|
# NOTE: this is intentionally not a UserString, the abc instanceof
|
||||||
|
# check is slow enough to eliminate all gains
|
||||||
|
__slots__ = ['inf_ver', 'data']
|
||||||
|
|
||||||
|
def __init__(self, string):
|
||||||
|
self.inf_ver = None
|
||||||
|
self.data = string
|
||||||
|
if len(string) >= iv_min_len:
|
||||||
|
try:
|
||||||
|
self.inf_ver = infinity_versions.index(string)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.data)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.data
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if isinstance(other, VersionStrComponent):
|
||||||
|
return self.data == other.data
|
||||||
|
return self.data == other
|
||||||
|
|
||||||
|
def __lt__(self, other):
|
||||||
|
if isinstance(other, VersionStrComponent):
|
||||||
|
if self.inf_ver is not None:
|
||||||
|
if other.inf_ver is not None:
|
||||||
|
return self.inf_ver > other.inf_ver
|
||||||
|
return False
|
||||||
|
if other.inf_ver is not None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return self.data < other.data
|
||||||
|
|
||||||
|
if self.inf_ver is not None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Numbers are always "newer" than letters.
|
||||||
|
# This is for consistency with RPM. See patch
|
||||||
|
# #60884 (and details) from bugzilla #50977 in
|
||||||
|
# the RPM project at rpm.org. Or look at
|
||||||
|
# rpmvercmp.c if you want to see how this is
|
||||||
|
# implemented there.
|
||||||
|
if isinstance(other, int):
|
||||||
|
return True
|
||||||
|
|
||||||
|
if isinstance(other, str):
|
||||||
|
return self < VersionStrComponent(other)
|
||||||
|
# If we get here, it's an unsupported comparison
|
||||||
|
|
||||||
|
raise ValueError("VersionStrComponent can only be compared with itself, "
|
||||||
|
"int and str")
|
||||||
|
|
||||||
|
def __gt__(self, other):
|
||||||
|
return not self.__lt__(other)
|
||||||
|
|
||||||
|
|
||||||
class Version(object):
|
class Version(object):
|
||||||
"""Class to represent versions"""
|
"""Class to represent versions"""
|
||||||
|
__slots__ = ['version', 'separators', 'string']
|
||||||
|
|
||||||
def __init__(self, string):
|
def __init__(self, string):
|
||||||
if not isinstance(string, str):
|
if not isinstance(string, str):
|
||||||
string = str(string)
|
string = str(string)
|
||||||
|
|
||||||
if not VALID_VERSION.match(string):
|
|
||||||
raise ValueError("Bad characters in version string: %s" % string)
|
|
||||||
|
|
||||||
# preserve the original string, but trimmed.
|
# preserve the original string, but trimmed.
|
||||||
string = string.strip()
|
string = string.strip()
|
||||||
self.string = string
|
self.string = string
|
||||||
|
|
||||||
# Split version into alphabetical and numeric segments
|
if not VALID_VERSION.match(string):
|
||||||
segments = SEGMENT_REGEX.findall(string)
|
raise ValueError("Bad characters in version string: %s" % string)
|
||||||
self.version = tuple(int_if_int(seg) for seg in segments)
|
|
||||||
|
|
||||||
# Store the separators from the original version string as well.
|
# Split version into alphabetical and numeric segments simultaneously
|
||||||
self.separators = tuple(SEGMENT_REGEX.split(string)[1:])
|
segments = SEGMENT_REGEX.findall(string)
|
||||||
|
self.version = tuple(
|
||||||
|
int(m[0]) if m[0] else VersionStrComponent(m[1]) for m in segments
|
||||||
|
)
|
||||||
|
self.separators = tuple(m[2] for m in segments)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def dotted(self):
|
def dotted(self):
|
||||||
|
@ -277,40 +331,8 @@ def __lt__(self, other):
|
||||||
if other is None:
|
if other is None:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Coerce if other is not a Version
|
# Use tuple comparison assisted by VersionStrComponent for performance
|
||||||
# simple equality test first.
|
return self.version < other.version
|
||||||
if self.version == other.version:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Standard comparison of two numeric versions
|
|
||||||
for a, b in zip(self.version, other.version):
|
|
||||||
if a == b:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
if a in infinity_versions:
|
|
||||||
if b in infinity_versions:
|
|
||||||
return (infinity_versions.index(a) >
|
|
||||||
infinity_versions.index(b))
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
if b in infinity_versions:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Neither a nor b is infinity
|
|
||||||
# Numbers are always "newer" than letters.
|
|
||||||
# This is for consistency with RPM. See patch
|
|
||||||
# #60884 (and details) from bugzilla #50977 in
|
|
||||||
# the RPM project at rpm.org. Or look at
|
|
||||||
# rpmvercmp.c if you want to see how this is
|
|
||||||
# implemented there.
|
|
||||||
if type(a) != type(b):
|
|
||||||
return type(b) == int
|
|
||||||
else:
|
|
||||||
return a < b
|
|
||||||
|
|
||||||
# If the common prefix is equal, the one
|
|
||||||
# with more segments is bigger.
|
|
||||||
return len(self.version) < len(other.version)
|
|
||||||
|
|
||||||
@coerced
|
@coerced
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
|
@ -595,7 +617,6 @@ def __init__(self, vlist=None):
|
||||||
else:
|
else:
|
||||||
self.versions = [vlist]
|
self.versions = [vlist]
|
||||||
else:
|
else:
|
||||||
vlist = list(vlist)
|
|
||||||
for v in vlist:
|
for v in vlist:
|
||||||
self.add(ver(v))
|
self.add(ver(v))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue