Fix version scraping for CRAN packages (#12021)
* Fix version scraping for CRAN packages * Remove set literals
This commit is contained in:
parent
cc4094bb9b
commit
2d144316a8
2 changed files with 20 additions and 20 deletions
|
@ -43,8 +43,8 @@
|
||||||
# work on paths and URLs. There's not a good word for both of these, but
|
# work on paths and URLs. There's not a good word for both of these, but
|
||||||
# "path" seemed like the most generic term.
|
# "path" seemed like the most generic term.
|
||||||
#
|
#
|
||||||
def find_list_url(url):
|
def find_list_urls(url):
|
||||||
r"""Finds a good list URL for the supplied URL.
|
r"""Find good list URLs for the supplied URL.
|
||||||
|
|
||||||
By default, returns the dirname of the archive path.
|
By default, returns the dirname of the archive path.
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ def find_list_url(url):
|
||||||
url (str): The download URL for the package
|
url (str): The download URL for the package
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The list URL for the package
|
set: One or more list URLs for the package
|
||||||
"""
|
"""
|
||||||
|
|
||||||
url_types = [
|
url_types = [
|
||||||
|
@ -93,12 +93,14 @@ def find_list_url(url):
|
||||||
lambda m: m.group(1) + '/Archive/' + m.group(2)),
|
lambda m: m.group(1) + '/Archive/' + m.group(2)),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
list_urls = set([os.path.dirname(url)])
|
||||||
|
|
||||||
for pattern, fun in url_types:
|
for pattern, fun in url_types:
|
||||||
match = re.search(pattern, url)
|
match = re.search(pattern, url)
|
||||||
if match:
|
if match:
|
||||||
return fun(match)
|
list_urls.add(fun(match))
|
||||||
else:
|
|
||||||
return os.path.dirname(url)
|
return list_urls
|
||||||
|
|
||||||
|
|
||||||
def strip_query_and_fragment(path):
|
def strip_query_and_fragment(path):
|
||||||
|
|
|
@ -270,20 +270,18 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
|
||||||
"""Scrape web pages for new versions of a tarball.
|
"""Scrape web pages for new versions of a tarball.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
archive_urls:
|
archive_urls (str or list or tuple): URL or sequence of URLs for
|
||||||
URL or sequence of URLs for different versions of a
|
different versions of a package. Typically these are just the
|
||||||
package. Typically these are just the tarballs from the package
|
tarballs from the package file itself. By default, this searches
|
||||||
file itself. By default, this searches the parent directories
|
the parent directories of archives.
|
||||||
of archives.
|
|
||||||
|
|
||||||
Keyword Arguments:
|
Keyword Arguments:
|
||||||
list_url:
|
list_url (str or None): URL for a listing of archives.
|
||||||
URL for a listing of archives. Spack wills scrape these
|
Spack will scrape these pages for download links that look
|
||||||
pages for download links that look like the archive URL.
|
like the archive URL.
|
||||||
|
|
||||||
list_depth:
|
|
||||||
Max depth to follow links on list_url pages. Default 0.
|
|
||||||
|
|
||||||
|
list_depth (int): Max depth to follow links on list_url pages.
|
||||||
|
Defaults to 0.
|
||||||
"""
|
"""
|
||||||
if not isinstance(archive_urls, (list, tuple)):
|
if not isinstance(archive_urls, (list, tuple)):
|
||||||
archive_urls = [archive_urls]
|
archive_urls = [archive_urls]
|
||||||
|
@ -291,17 +289,17 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
|
||||||
# Generate a list of list_urls based on archive urls and any
|
# Generate a list of list_urls based on archive urls and any
|
||||||
# explicitly listed list_url in the package
|
# explicitly listed list_url in the package
|
||||||
list_urls = set()
|
list_urls = set()
|
||||||
if list_url:
|
if list_url is not None:
|
||||||
list_urls.add(list_url)
|
list_urls.add(list_url)
|
||||||
for aurl in archive_urls:
|
for aurl in archive_urls:
|
||||||
list_urls.add(spack.url.find_list_url(aurl))
|
list_urls |= spack.url.find_list_urls(aurl)
|
||||||
|
|
||||||
# Add '/' to the end of the URL. Some web servers require this.
|
# Add '/' to the end of the URL. Some web servers require this.
|
||||||
additional_list_urls = set()
|
additional_list_urls = set()
|
||||||
for lurl in list_urls:
|
for lurl in list_urls:
|
||||||
if not lurl.endswith('/'):
|
if not lurl.endswith('/'):
|
||||||
additional_list_urls.add(lurl + '/')
|
additional_list_urls.add(lurl + '/')
|
||||||
list_urls.update(additional_list_urls)
|
list_urls |= additional_list_urls
|
||||||
|
|
||||||
# Grab some web pages to scrape.
|
# Grab some web pages to scrape.
|
||||||
pages = {}
|
pages = {}
|
||||||
|
|
Loading…
Reference in a new issue