Checksum match (#28989)

* cmd/checksum: prefer url matching url_from_version

This is a minimal change toward getting the right archive from places
like github.  The heuristic is:

* if an archive url exists, take its version
* generate a url from the package with pkg.url_from_version
* if they match
  * stop considering other URLs for this version
  * otherwise, continue replacing the url for the version

I doubt this will always work, but it should address a variety of
versions of this bug.  A good test right now is `spack checksum gh`,
which checksums macos binaries without this, and the correct source
packages with it.

fixes #15985
related to #14129
related to #13940

* add heuristics to help create as well

Since create can't rely on an existing package, this commit adds another
pair of heuristics:
1. if the current version is a specifically listed archive, don't
   replace it
2. if the current url matches the result of applying
   `spack.url.substitute_version(a, ver)` for any a in archive_urls,
   prefer it and don't replace it

fixes #13940

* clean up style and a lingering debug import

* ok flake8, you got me

* document reference_package argument

* Update lib/spack/spack/util/web.py

Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>

* try to appease sphinx

Co-authored-by: Adam J. Stewart <ajstewart426@gmail.com>
This commit is contained in:
Tom Scogland 2022-02-22 16:55:59 -08:00 committed by GitHub
parent 535262844b
commit a9ba40164a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 2 deletions

View file

@ -2564,7 +2564,11 @@ def fetch_remote_versions(self, concurrency=128):
try:
return spack.util.web.find_versions_of_archive(
self.all_urls, self.list_url, self.list_depth, concurrency
self.all_urls,
self.list_url,
self.list_depth,
concurrency,
reference_package=self,
)
except spack.util.web.NoNetworkConnectionError as e:
tty.die("Package.fetch_versions couldn't connect to:", e.url,

View file

@ -562,7 +562,7 @@ def _urlopen(req, *args, **kwargs):
def find_versions_of_archive(
archive_urls, list_url=None, list_depth=0, concurrency=32
archive_urls, list_url=None, list_depth=0, concurrency=32, reference_package=None
):
"""Scrape web pages for new versions of a tarball.
@ -577,6 +577,10 @@ def find_versions_of_archive(
list_depth (int): max depth to follow links on list_url pages.
Defaults to 0.
concurrency (int): maximum number of concurrent requests
reference_package (spack.package.Package or None): a spack package
used as a reference for url detection. Uses the url_for_version
method on the package to produce reference urls which, if found,
are preferred.
"""
if not isinstance(archive_urls, (list, tuple)):
archive_urls = [archive_urls]
@ -638,11 +642,26 @@ def find_versions_of_archive(
# Walk through archive_url links first.
# Any conflicting versions will be overwritten by the list_url links.
versions = {}
matched = set()
for url in archive_urls + sorted(links):
if any(re.search(r, url) for r in regexes):
try:
ver = spack.url.parse_version(url)
if ver in matched:
continue
versions[ver] = url
# prevent this version from getting overwritten
if url in archive_urls:
matched.add(ver)
elif reference_package is not None:
if url == reference_package.url_for_version(ver):
matched.add(ver)
else:
extrapolated_urls = [
spack.url.substitute_version(u, ver) for u in archive_urls
]
if url in extrapolated_urls:
matched.add(ver)
except spack.url.UndetectableVersionError:
continue