Checksum match (#28989)
* cmd/checksum: prefer url matching url_from_version This is a minimal change toward getting the right archive from places like github. The heuristic is: * if an archive url exists, take its version * generate a url from the package with pkg.url_from_version * if they match * stop considering other URLs for this version * otherwise, continue replacing the url for the version I doubt this will always work, but it should address a variety of versions of this bug. A good test right now is `spack checksum gh`, which checksums macos binaries without this, and the correct source packages with it. fixes #15985 related to #14129 related to #13940 * add heuristics to help create as well Since create can't rely on an existing package, this commit adds another pair of heuristics: 1. if the current version is a specifically listed archive, don't replace it 2. if the current url matches the result of applying `spack.url.substitute_version(a, ver)` for any a in archive_urls, prefer it and don't replace it fixes #13940 * clean up style and a lingering debug import * ok flake8, you got me * document reference_package argument * Update lib/spack/spack/util/web.py Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com> * try to appease sphinx Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
This commit is contained in:
parent
535262844b
commit
a9ba40164a
2 changed files with 25 additions and 2 deletions
|
@ -2564,7 +2564,11 @@ def fetch_remote_versions(self, concurrency=128):
|
|||
|
||||
try:
|
||||
return spack.util.web.find_versions_of_archive(
|
||||
self.all_urls, self.list_url, self.list_depth, concurrency
|
||||
self.all_urls,
|
||||
self.list_url,
|
||||
self.list_depth,
|
||||
concurrency,
|
||||
reference_package=self,
|
||||
)
|
||||
except spack.util.web.NoNetworkConnectionError as e:
|
||||
tty.die("Package.fetch_versions couldn't connect to:", e.url,
|
||||
|
|
|
@ -562,7 +562,7 @@ def _urlopen(req, *args, **kwargs):
|
|||
|
||||
|
||||
def find_versions_of_archive(
|
||||
archive_urls, list_url=None, list_depth=0, concurrency=32
|
||||
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
|
||||
):
|
||||
"""Scrape web pages for new versions of a tarball.
|
||||
|
||||
|
@ -577,6 +577,10 @@ def find_versions_of_archive(
|
|||
list_depth (int): max depth to follow links on list_url pages.
|
||||
Defaults to 0.
|
||||
concurrency (int): maximum number of concurrent requests
|
||||
reference_package (spack.package.Package or None): a spack package
|
||||
used as a reference for url detection. Uses the url_for_version
|
||||
method on the package to produce reference urls which, if found,
|
||||
are preferred.
|
||||
"""
|
||||
if not isinstance(archive_urls, (list, tuple)):
|
||||
archive_urls = [archive_urls]
|
||||
|
@ -638,11 +642,26 @@ def find_versions_of_archive(
|
|||
# Walk through archive_url links first.
|
||||
# Any conflicting versions will be overwritten by the list_url links.
|
||||
versions = {}
|
||||
matched = set()
|
||||
for url in archive_urls + sorted(links):
|
||||
if any(re.search(r, url) for r in regexes):
|
||||
try:
|
||||
ver = spack.url.parse_version(url)
|
||||
if ver in matched:
|
||||
continue
|
||||
versions[ver] = url
|
||||
# prevent this version from getting overwritten
|
||||
if url in archive_urls:
|
||||
matched.add(ver)
|
||||
elif reference_package is not None:
|
||||
if url == reference_package.url_for_version(ver):
|
||||
matched.add(ver)
|
||||
else:
|
||||
extrapolated_urls = [
|
||||
spack.url.substitute_version(u, ver) for u in archive_urls
|
||||
]
|
||||
if url in extrapolated_urls:
|
||||
matched.add(ver)
|
||||
except spack.url.UndetectableVersionError:
|
||||
continue
|
||||
|
||||
|
|
Loading…
Reference in a new issue