Consolidate some web-spidering commands in spack.util.web
- move `spack.cmd.checksum.get_checksums` to `spack.util.web.spider_checksums` - move `spack.error.NoNetworkError` to `spack.util.web.NoNetworkError` since it is only used there.
This commit is contained in:
parent
2198a0e229
commit
94d85d842c
6 changed files with 118 additions and 116 deletions
|
@ -25,13 +25,12 @@
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import hashlib
|
|
||||||
|
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
import spack
|
import spack
|
||||||
import spack.cmd
|
import spack.cmd
|
||||||
import spack.util.crypto
|
import spack.util.crypto
|
||||||
from spack.stage import Stage, FailedDownloadError
|
import spack.util.web
|
||||||
from spack.util.naming import *
|
from spack.util.naming import *
|
||||||
from spack.version import *
|
from spack.version import *
|
||||||
|
|
||||||
|
@ -52,90 +51,6 @@ def setup_parser(subparser):
|
||||||
help='versions to generate checksums for')
|
help='versions to generate checksums for')
|
||||||
|
|
||||||
|
|
||||||
def get_checksums(url_dict, name, **kwargs):
|
|
||||||
"""Fetches and checksums archives from URLs.
|
|
||||||
|
|
||||||
This function is called by both ``spack checksum`` and ``spack create``.
|
|
||||||
The ``first_stage_function`` kwarg allows ``spack create`` to determine
|
|
||||||
things like the build system of the archive.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url_dict (dict): A dictionary of the form: version -> URL
|
|
||||||
name (str): The name of the package
|
|
||||||
first_stage_function (callable): Function to run on first staging area
|
|
||||||
keep_stage (bool): Don't clean up staging area when command completes
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: A multi-line string containing versions and corresponding hashes
|
|
||||||
"""
|
|
||||||
first_stage_function = kwargs.get('first_stage_function', None)
|
|
||||||
keep_stage = kwargs.get('keep_stage', False)
|
|
||||||
|
|
||||||
sorted_versions = sorted(url_dict.keys(), reverse=True)
|
|
||||||
|
|
||||||
# Find length of longest string in the list for padding
|
|
||||||
max_len = max(len(str(v)) for v in sorted_versions)
|
|
||||||
num_ver = len(sorted_versions)
|
|
||||||
|
|
||||||
tty.msg("Found {0} version{1} of {2}:".format(
|
|
||||||
num_ver, '' if num_ver == 1 else 's', name),
|
|
||||||
"",
|
|
||||||
*spack.cmd.elide_list(
|
|
||||||
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
|
|
||||||
for v in sorted_versions]))
|
|
||||||
print()
|
|
||||||
|
|
||||||
archives_to_fetch = tty.get_number(
|
|
||||||
"How many would you like to checksum?", default=1, abort='q')
|
|
||||||
|
|
||||||
if not archives_to_fetch:
|
|
||||||
tty.die("Aborted.")
|
|
||||||
|
|
||||||
versions = sorted_versions[:archives_to_fetch]
|
|
||||||
urls = [url_dict[v] for v in versions]
|
|
||||||
|
|
||||||
tty.msg("Downloading...")
|
|
||||||
version_hashes = []
|
|
||||||
i = 0
|
|
||||||
for url, version in zip(urls, versions):
|
|
||||||
try:
|
|
||||||
with Stage(url, keep=keep_stage) as stage:
|
|
||||||
# Fetch the archive
|
|
||||||
stage.fetch()
|
|
||||||
if i == 0 and first_stage_function:
|
|
||||||
# Only run first_stage_function the first time,
|
|
||||||
# no need to run it every time
|
|
||||||
first_stage_function(stage, url)
|
|
||||||
|
|
||||||
# Checksum the archive and add it to the list
|
|
||||||
version_hashes.append((version, spack.util.crypto.checksum(
|
|
||||||
hashlib.md5, stage.archive_file)))
|
|
||||||
i += 1
|
|
||||||
except FailedDownloadError:
|
|
||||||
tty.msg("Failed to fetch {0}".format(url))
|
|
||||||
except Exception as e:
|
|
||||||
tty.msg("Something failed on {0}, skipping.".format(url),
|
|
||||||
" ({0})".format(e))
|
|
||||||
|
|
||||||
if not version_hashes:
|
|
||||||
tty.die("Could not fetch any versions for {0}".format(name))
|
|
||||||
|
|
||||||
# Find length of longest string in the list for padding
|
|
||||||
max_len = max(len(str(v)) for v, h in version_hashes)
|
|
||||||
|
|
||||||
# Generate the version directives to put in a package.py
|
|
||||||
version_lines = "\n".join([
|
|
||||||
" version('{0}', {1}'{2}')".format(
|
|
||||||
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
|
|
||||||
])
|
|
||||||
|
|
||||||
num_hash = len(version_hashes)
|
|
||||||
tty.msg("Checksummed {0} version{1} of {2}".format(
|
|
||||||
num_hash, '' if num_hash == 1 else 's', name))
|
|
||||||
|
|
||||||
return version_lines
|
|
||||||
|
|
||||||
|
|
||||||
def checksum(parser, args):
|
def checksum(parser, args):
|
||||||
# Make sure the user provided a package and not a URL
|
# Make sure the user provided a package and not a URL
|
||||||
if not valid_fully_qualified_module_name(args.package):
|
if not valid_fully_qualified_module_name(args.package):
|
||||||
|
@ -160,7 +75,7 @@ def checksum(parser, args):
|
||||||
if not url_dict:
|
if not url_dict:
|
||||||
tty.die("Could not find any versions for {0}".format(pkg.name))
|
tty.die("Could not find any versions for {0}".format(pkg.name))
|
||||||
|
|
||||||
version_lines = get_checksums(
|
version_lines = spack.util.web.get_checksums_for_versions(
|
||||||
url_dict, pkg.name, keep_stage=args.keep_stage)
|
url_dict, pkg.name, keep_stage=args.keep_stage)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
|
@ -30,7 +30,6 @@
|
||||||
import llnl.util.tty as tty
|
import llnl.util.tty as tty
|
||||||
import spack
|
import spack
|
||||||
import spack.cmd
|
import spack.cmd
|
||||||
import spack.cmd.checksum
|
|
||||||
import spack.util.web
|
import spack.util.web
|
||||||
from llnl.util.filesystem import mkdirp
|
from llnl.util.filesystem import mkdirp
|
||||||
from spack.repository import Repo
|
from spack.repository import Repo
|
||||||
|
@ -587,7 +586,7 @@ def get_versions(args, name):
|
||||||
version = parse_version(args.url)
|
version = parse_version(args.url)
|
||||||
url_dict = {version: args.url}
|
url_dict = {version: args.url}
|
||||||
|
|
||||||
versions = spack.cmd.checksum.get_checksums(
|
versions = spack.util.web.get_checksums_for_versions(
|
||||||
url_dict, name, first_stage_function=guesser,
|
url_dict, name, first_stage_function=guesser,
|
||||||
keep_stage=args.keep_stage)
|
keep_stage=args.keep_stage)
|
||||||
|
|
||||||
|
|
|
@ -113,16 +113,6 @@ def __init__(self, message):
|
||||||
super(UnsupportedPlatformError, self).__init__(message)
|
super(UnsupportedPlatformError, self).__init__(message)
|
||||||
|
|
||||||
|
|
||||||
class NoNetworkConnectionError(SpackError):
|
|
||||||
"""Raised when an operation needs an internet connection."""
|
|
||||||
|
|
||||||
def __init__(self, message, url):
|
|
||||||
super(NoNetworkConnectionError, self).__init__(
|
|
||||||
"No network connection: " + str(message),
|
|
||||||
"URL was: " + str(url))
|
|
||||||
self.url = url
|
|
||||||
|
|
||||||
|
|
||||||
class SpecError(SpackError):
|
class SpecError(SpackError):
|
||||||
"""Superclass for all errors that occur while constructing specs."""
|
"""Superclass for all errors that occur while constructing specs."""
|
||||||
|
|
||||||
|
|
|
@ -54,13 +54,14 @@
|
||||||
import spack
|
import spack
|
||||||
import spack.error
|
import spack.error
|
||||||
import spack.util.crypto as crypto
|
import spack.util.crypto as crypto
|
||||||
|
import spack.util.pattern as pattern
|
||||||
from spack.util.executable import *
|
from spack.util.executable import *
|
||||||
from spack.util.string import *
|
from spack.util.string import *
|
||||||
from spack.version import Version, ver
|
from spack.version import Version, ver
|
||||||
from spack.util.compression import decompressor_for, extension
|
from spack.util.compression import decompressor_for, extension
|
||||||
|
|
||||||
import spack.util.pattern as pattern
|
|
||||||
"""List of all fetch strategies, created by FetchStrategy metaclass."""
|
#: List of all fetch strategies, created by FetchStrategy metaclass.
|
||||||
all_strategies = []
|
all_strategies = []
|
||||||
|
|
||||||
|
|
||||||
|
@ -967,7 +968,7 @@ def from_list_url(pkg):
|
||||||
the specified package's version."""
|
the specified package's version."""
|
||||||
if pkg.list_url:
|
if pkg.list_url:
|
||||||
try:
|
try:
|
||||||
versions = pkg.fetch_remote_versions()
|
versions = pkg.fetch_remote_package_versions()
|
||||||
try:
|
try:
|
||||||
url_from_list = versions[pkg.version]
|
url_from_list = versions[pkg.version]
|
||||||
digest = None
|
digest = None
|
||||||
|
|
|
@ -1841,7 +1841,7 @@ def fetch_remote_versions(self):
|
||||||
try:
|
try:
|
||||||
return spack.util.web.find_versions_of_archive(
|
return spack.util.web.find_versions_of_archive(
|
||||||
self.all_urls, self.list_url, self.list_depth)
|
self.all_urls, self.list_url, self.list_depth)
|
||||||
except spack.error.NoNetworkConnectionError as e:
|
except spack.util.web.NoNetworkConnectionError as e:
|
||||||
tty.die("Package.fetch_versions couldn't connect to:", e.url,
|
tty.die("Package.fetch_versions couldn't connect to:", e.url,
|
||||||
e.message)
|
e.message)
|
||||||
|
|
||||||
|
@ -2064,15 +2064,6 @@ def __init__(self, version):
|
||||||
"Please provide a url for this version in the package.py file.")
|
"Please provide a url for this version in the package.py file.")
|
||||||
|
|
||||||
|
|
||||||
class VersionFetchError(PackageError):
|
|
||||||
"""Raised when a version URL cannot automatically be determined."""
|
|
||||||
|
|
||||||
def __init__(self, cls):
|
|
||||||
super(VersionFetchError, self).__init__(
|
|
||||||
"Cannot fetch versions for package %s " % cls.__name__ +
|
|
||||||
"because it does not define any URLs to fetch.")
|
|
||||||
|
|
||||||
|
|
||||||
class NoURLError(PackageError):
|
class NoURLError(PackageError):
|
||||||
"""Raised when someone tries to build a URL for a package with no URLs."""
|
"""Raised when someone tries to build a URL for a package with no URLs."""
|
||||||
|
|
||||||
|
|
|
@ -22,11 +22,14 @@
|
||||||
# License along with this program; if not, write to the Free Software
|
# License along with this program; if not, write to the Free Software
|
||||||
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import ssl
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from six.moves.urllib.request import urlopen, Request
|
from six.moves.urllib.request import urlopen, Request
|
||||||
from six.moves.urllib.error import URLError
|
from six.moves.urllib.error import URLError
|
||||||
|
@ -50,8 +53,9 @@ class HTMLParseError(Exception):
|
||||||
import spack.error
|
import spack.error
|
||||||
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
|
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
|
||||||
|
|
||||||
|
|
||||||
# Timeout in seconds for web requests
|
# Timeout in seconds for web requests
|
||||||
TIMEOUT = 10
|
_timeout = 10
|
||||||
|
|
||||||
|
|
||||||
class LinkParser(HTMLParser):
|
class LinkParser(HTMLParser):
|
||||||
|
@ -127,7 +131,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
|
||||||
# if you ask for a tarball with Accept: text/html.
|
# if you ask for a tarball with Accept: text/html.
|
||||||
req = Request(url)
|
req = Request(url)
|
||||||
req.get_method = lambda: "HEAD"
|
req.get_method = lambda: "HEAD"
|
||||||
resp = _urlopen(req, timeout=TIMEOUT, context=context)
|
resp = _urlopen(req, timeout=_timeout, context=context)
|
||||||
|
|
||||||
if "Content-type" not in resp.headers:
|
if "Content-type" not in resp.headers:
|
||||||
tty.debug("ignoring page " + url)
|
tty.debug("ignoring page " + url)
|
||||||
|
@ -140,7 +144,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
|
||||||
|
|
||||||
# Do the real GET request when we know it's just HTML.
|
# Do the real GET request when we know it's just HTML.
|
||||||
req.get_method = lambda: "GET"
|
req.get_method = lambda: "GET"
|
||||||
response = _urlopen(req, timeout=TIMEOUT, context=context)
|
response = _urlopen(req, timeout=_timeout, context=context)
|
||||||
response_url = response.geturl()
|
response_url = response.geturl()
|
||||||
|
|
||||||
# Read the page and and stick it in the map we'll return
|
# Read the page and and stick it in the map we'll return
|
||||||
|
@ -199,7 +203,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
|
||||||
"own risk.")
|
"own risk.")
|
||||||
|
|
||||||
if raise_on_error:
|
if raise_on_error:
|
||||||
raise spack.error.NoNetworkConnectionError(str(e), url)
|
raise NoNetworkConnectionError(str(e), url)
|
||||||
|
|
||||||
except HTMLParseError as e:
|
except HTMLParseError as e:
|
||||||
# This error indicates that Python's HTML parser sucks.
|
# This error indicates that Python's HTML parser sucks.
|
||||||
|
@ -328,3 +332,105 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return versions
|
return versions
|
||||||
|
|
||||||
|
|
||||||
|
def get_checksums_for_versions(
|
||||||
|
url_dict, name, first_stage_function=None, keep_stage=False):
|
||||||
|
"""Fetches and checksums archives from URLs.
|
||||||
|
|
||||||
|
This function is called by both ``spack checksum`` and ``spack
|
||||||
|
create``. The ``first_stage_function`` argument allows the caller to
|
||||||
|
inspect the first downloaded archive, e.g., to determine the build
|
||||||
|
system.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url_dict (dict): A dictionary of the form: version -> URL
|
||||||
|
name (str): The name of the package
|
||||||
|
first_stage_function (callable): function that takes a Stage and a URL;
|
||||||
|
this is run on the stage of the first URL downloaded
|
||||||
|
keep_stage (bool): whether to keep staging area when command completes
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(str): A multi-line string containing versions and corresponding hashes
|
||||||
|
|
||||||
|
"""
|
||||||
|
sorted_versions = sorted(url_dict.keys(), reverse=True)
|
||||||
|
|
||||||
|
# Find length of longest string in the list for padding
|
||||||
|
max_len = max(len(str(v)) for v in sorted_versions)
|
||||||
|
num_ver = len(sorted_versions)
|
||||||
|
|
||||||
|
tty.msg("Found {0} version{1} of {2}:".format(
|
||||||
|
num_ver, '' if num_ver == 1 else 's', name),
|
||||||
|
"",
|
||||||
|
*spack.cmd.elide_list(
|
||||||
|
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
|
||||||
|
for v in sorted_versions]))
|
||||||
|
print()
|
||||||
|
|
||||||
|
archives_to_fetch = tty.get_number(
|
||||||
|
"How many would you like to checksum?", default=1, abort='q')
|
||||||
|
|
||||||
|
if not archives_to_fetch:
|
||||||
|
tty.die("Aborted.")
|
||||||
|
|
||||||
|
versions = sorted_versions[:archives_to_fetch]
|
||||||
|
urls = [url_dict[v] for v in versions]
|
||||||
|
|
||||||
|
tty.msg("Downloading...")
|
||||||
|
version_hashes = []
|
||||||
|
i = 0
|
||||||
|
for url, version in zip(urls, versions):
|
||||||
|
try:
|
||||||
|
with spack.stage.Stage(url, keep=keep_stage) as stage:
|
||||||
|
# Fetch the archive
|
||||||
|
stage.fetch()
|
||||||
|
if i == 0 and first_stage_function:
|
||||||
|
# Only run first_stage_function the first time,
|
||||||
|
# no need to run it every time
|
||||||
|
first_stage_function(stage, url)
|
||||||
|
|
||||||
|
# Checksum the archive and add it to the list
|
||||||
|
version_hashes.append((version, spack.util.crypto.checksum(
|
||||||
|
hashlib.md5, stage.archive_file)))
|
||||||
|
i += 1
|
||||||
|
except spack.stage.FailedDownloadError:
|
||||||
|
tty.msg("Failed to fetch {0}".format(url))
|
||||||
|
except Exception as e:
|
||||||
|
tty.msg("Something failed on {0}, skipping.".format(url),
|
||||||
|
" ({0})".format(e))
|
||||||
|
|
||||||
|
if not version_hashes:
|
||||||
|
tty.die("Could not fetch any versions for {0}".format(name))
|
||||||
|
|
||||||
|
# Find length of longest string in the list for padding
|
||||||
|
max_len = max(len(str(v)) for v, h in version_hashes)
|
||||||
|
|
||||||
|
# Generate the version directives to put in a package.py
|
||||||
|
version_lines = "\n".join([
|
||||||
|
" version('{0}', {1}'{2}')".format(
|
||||||
|
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
|
||||||
|
])
|
||||||
|
|
||||||
|
num_hash = len(version_hashes)
|
||||||
|
tty.msg("Checksummed {0} version{1} of {2}".format(
|
||||||
|
num_hash, '' if num_hash == 1 else 's', name))
|
||||||
|
|
||||||
|
return version_lines
|
||||||
|
|
||||||
|
|
||||||
|
class SpackWebError(spack.error.SpackError):
|
||||||
|
"""Superclass for Spack web spidering errors."""
|
||||||
|
|
||||||
|
|
||||||
|
class VersionFetchError(SpackWebError):
|
||||||
|
"""Raised when we can't determine a URL to fetch a package."""
|
||||||
|
|
||||||
|
|
||||||
|
class NoNetworkConnectionError(SpackWebError):
|
||||||
|
"""Raised when an operation can't get an internet connection."""
|
||||||
|
def __init__(self, message, url):
|
||||||
|
super(NoNetworkConnectionError, self).__init__(
|
||||||
|
"No network connection: " + str(message),
|
||||||
|
"URL was: " + str(url))
|
||||||
|
self.url = url
|
||||||
|
|
Loading…
Reference in a new issue