From 2d144316a80eab469be3a851d5094f0a3da35e4a Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Mon, 22 Jul 2019 15:05:55 -0500 Subject: [PATCH] Fix version scraping for CRAN packages (#12021) * Fix version scraping for CRAN packages * Remove set literals --- lib/spack/spack/url.py | 14 ++++++++------ lib/spack/spack/util/web.py | 26 ++++++++++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/lib/spack/spack/url.py b/lib/spack/spack/url.py index ac1321e66a..11b289a0b1 100644 --- a/lib/spack/spack/url.py +++ b/lib/spack/spack/url.py @@ -43,8 +43,8 @@ # work on paths and URLs. There's not a good word for both of these, but # "path" seemed like the most generic term. # -def find_list_url(url): - r"""Finds a good list URL for the supplied URL. +def find_list_urls(url): + r"""Find good list URLs for the supplied URL. By default, returns the dirname of the archive path. @@ -62,7 +62,7 @@ def find_list_url(url): url (str): The download URL for the package Returns: - str: The list URL for the package + set: One or more list URLs for the package """ url_types = [ @@ -93,12 +93,14 @@ def find_list_url(url): lambda m: m.group(1) + '/Archive/' + m.group(2)), ] + list_urls = set([os.path.dirname(url)]) + for pattern, fun in url_types: match = re.search(pattern, url) if match: - return fun(match) - else: - return os.path.dirname(url) + list_urls.add(fun(match)) + + return list_urls def strip_query_and_fragment(path): diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index 99078b203a..da2d5bbeb9 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -270,20 +270,18 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0): """Scrape web pages for new versions of a tarball. Arguments: - archive_urls: - URL or sequence of URLs for different versions of a - package. Typically these are just the tarballs from the package - file itself. By default, this searches the parent directories - of archives. + archive_urls (str or list or tuple): URL or sequence of URLs for + different versions of a package. Typically these are just the + tarballs from the package file itself. By default, this searches + the parent directories of archives. Keyword Arguments: - list_url: - URL for a listing of archives. Spack wills scrape these - pages for download links that look like the archive URL. - - list_depth: - Max depth to follow links on list_url pages. Default 0. + list_url (str or None): URL for a listing of archives. + Spack will scrape these pages for download links that look + like the archive URL. + list_depth (int): Max depth to follow links on list_url pages. + Defaults to 0. """ if not isinstance(archive_urls, (list, tuple)): archive_urls = [archive_urls] @@ -291,17 +289,17 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0): # Generate a list of list_urls based on archive urls and any # explicitly listed list_url in the package list_urls = set() - if list_url: + if list_url is not None: list_urls.add(list_url) for aurl in archive_urls: - list_urls.add(spack.url.find_list_url(aurl)) + list_urls |= spack.url.find_list_urls(aurl) # Add '/' to the end of the URL. Some web servers require this. additional_list_urls = set() for lurl in list_urls: if not lurl.endswith('/'): additional_list_urls.add(lurl + '/') - list_urls.update(additional_list_urls) + list_urls |= additional_list_urls # Grab some web pages to scrape. pages = {}