Consolidate some web-spidering commands in spack.util.web

- move `spack.cmd.checksum.get_checksums` to `spack.util.web.spider_checksums`

- move `spack.error.NoNetworkError` to `spack.util.web.NoNetworkError` since
  it is only used there.
This commit is contained in:
Todd Gamblin 2017-09-23 15:17:42 -07:00
parent 2198a0e229
commit 94d85d842c
6 changed files with 118 additions and 116 deletions

View file

@ -25,13 +25,12 @@
from __future__ import print_function
import argparse
import hashlib
import llnl.util.tty as tty
import spack
import spack.cmd
import spack.util.crypto
from spack.stage import Stage, FailedDownloadError
import spack.util.web
from spack.util.naming import *
from spack.version import *
@ -52,90 +51,6 @@ def setup_parser(subparser):
help='versions to generate checksums for')
def get_checksums(url_dict, name, **kwargs):
"""Fetches and checksums archives from URLs.
This function is called by both ``spack checksum`` and ``spack create``.
The ``first_stage_function`` kwarg allows ``spack create`` to determine
things like the build system of the archive.
Args:
url_dict (dict): A dictionary of the form: version -> URL
name (str): The name of the package
first_stage_function (callable): Function to run on first staging area
keep_stage (bool): Don't clean up staging area when command completes
Returns:
str: A multi-line string containing versions and corresponding hashes
"""
first_stage_function = kwargs.get('first_stage_function', None)
keep_stage = kwargs.get('keep_stage', False)
sorted_versions = sorted(url_dict.keys(), reverse=True)
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v in sorted_versions)
num_ver = len(sorted_versions)
tty.msg("Found {0} version{1} of {2}:".format(
num_ver, '' if num_ver == 1 else 's', name),
"",
*spack.cmd.elide_list(
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
for v in sorted_versions]))
print()
archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=1, abort='q')
if not archives_to_fetch:
tty.die("Aborted.")
versions = sorted_versions[:archives_to_fetch]
urls = [url_dict[v] for v in versions]
tty.msg("Downloading...")
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)
# Checksum the archive and add it to the list
version_hashes.append((version, spack.util.crypto.checksum(
hashlib.md5, stage.archive_file)))
i += 1
except FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))
if not version_hashes:
tty.die("Could not fetch any versions for {0}".format(name))
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)
# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}'{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))
return version_lines
def checksum(parser, args):
# Make sure the user provided a package and not a URL
if not valid_fully_qualified_module_name(args.package):
@ -160,7 +75,7 @@ def checksum(parser, args):
if not url_dict:
tty.die("Could not find any versions for {0}".format(pkg.name))
version_lines = get_checksums(
version_lines = spack.util.web.get_checksums_for_versions(
url_dict, pkg.name, keep_stage=args.keep_stage)
print()

View file

@ -30,7 +30,6 @@
import llnl.util.tty as tty
import spack
import spack.cmd
import spack.cmd.checksum
import spack.util.web
from llnl.util.filesystem import mkdirp
from spack.repository import Repo
@ -587,7 +586,7 @@ def get_versions(args, name):
version = parse_version(args.url)
url_dict = {version: args.url}
versions = spack.cmd.checksum.get_checksums(
versions = spack.util.web.get_checksums_for_versions(
url_dict, name, first_stage_function=guesser,
keep_stage=args.keep_stage)

View file

@ -113,16 +113,6 @@ def __init__(self, message):
super(UnsupportedPlatformError, self).__init__(message)
class NoNetworkConnectionError(SpackError):
"""Raised when an operation needs an internet connection."""
def __init__(self, message, url):
super(NoNetworkConnectionError, self).__init__(
"No network connection: " + str(message),
"URL was: " + str(url))
self.url = url
class SpecError(SpackError):
"""Superclass for all errors that occur while constructing specs."""

View file

@ -54,13 +54,14 @@
import spack
import spack.error
import spack.util.crypto as crypto
import spack.util.pattern as pattern
from spack.util.executable import *
from spack.util.string import *
from spack.version import Version, ver
from spack.util.compression import decompressor_for, extension
import spack.util.pattern as pattern
"""List of all fetch strategies, created by FetchStrategy metaclass."""
#: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = []
@ -967,7 +968,7 @@ def from_list_url(pkg):
the specified package's version."""
if pkg.list_url:
try:
versions = pkg.fetch_remote_versions()
versions = pkg.fetch_remote_package_versions()
try:
url_from_list = versions[pkg.version]
digest = None

View file

@ -1841,7 +1841,7 @@ def fetch_remote_versions(self):
try:
return spack.util.web.find_versions_of_archive(
self.all_urls, self.list_url, self.list_depth)
except spack.error.NoNetworkConnectionError as e:
except spack.util.web.NoNetworkConnectionError as e:
tty.die("Package.fetch_versions couldn't connect to:", e.url,
e.message)
@ -2064,15 +2064,6 @@ def __init__(self, version):
"Please provide a url for this version in the package.py file.")
class VersionFetchError(PackageError):
"""Raised when a version URL cannot automatically be determined."""
def __init__(self, cls):
super(VersionFetchError, self).__init__(
"Cannot fetch versions for package %s " % cls.__name__ +
"because it does not define any URLs to fetch.")
class NoURLError(PackageError):
"""Raised when someone tries to build a URL for a package with no URLs."""

View file

@ -22,11 +22,14 @@
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
from __future__ import print_function
import re
import os
import ssl
import sys
import traceback
import hashlib
from six.moves.urllib.request import urlopen, Request
from six.moves.urllib.error import URLError
@ -50,8 +53,9 @@ class HTMLParseError(Exception):
import spack.error
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
# Timeout in seconds for web requests
TIMEOUT = 10
_timeout = 10
class LinkParser(HTMLParser):
@ -127,7 +131,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
# if you ask for a tarball with Accept: text/html.
req = Request(url)
req.get_method = lambda: "HEAD"
resp = _urlopen(req, timeout=TIMEOUT, context=context)
resp = _urlopen(req, timeout=_timeout, context=context)
if "Content-type" not in resp.headers:
tty.debug("ignoring page " + url)
@ -140,7 +144,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
# Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET"
response = _urlopen(req, timeout=TIMEOUT, context=context)
response = _urlopen(req, timeout=_timeout, context=context)
response_url = response.geturl()
# Read the page and and stick it in the map we'll return
@ -199,7 +203,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
"own risk.")
if raise_on_error:
raise spack.error.NoNetworkConnectionError(str(e), url)
raise NoNetworkConnectionError(str(e), url)
except HTMLParseError as e:
# This error indicates that Python's HTML parser sucks.
@ -328,3 +332,105 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
continue
return versions
def get_checksums_for_versions(
url_dict, name, first_stage_function=None, keep_stage=False):
"""Fetches and checksums archives from URLs.
This function is called by both ``spack checksum`` and ``spack
create``. The ``first_stage_function`` argument allows the caller to
inspect the first downloaded archive, e.g., to determine the build
system.
Args:
url_dict (dict): A dictionary of the form: version -> URL
name (str): The name of the package
first_stage_function (callable): function that takes a Stage and a URL;
this is run on the stage of the first URL downloaded
keep_stage (bool): whether to keep staging area when command completes
Returns:
(str): A multi-line string containing versions and corresponding hashes
"""
sorted_versions = sorted(url_dict.keys(), reverse=True)
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v in sorted_versions)
num_ver = len(sorted_versions)
tty.msg("Found {0} version{1} of {2}:".format(
num_ver, '' if num_ver == 1 else 's', name),
"",
*spack.cmd.elide_list(
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
for v in sorted_versions]))
print()
archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=1, abort='q')
if not archives_to_fetch:
tty.die("Aborted.")
versions = sorted_versions[:archives_to_fetch]
urls = [url_dict[v] for v in versions]
tty.msg("Downloading...")
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with spack.stage.Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)
# Checksum the archive and add it to the list
version_hashes.append((version, spack.util.crypto.checksum(
hashlib.md5, stage.archive_file)))
i += 1
except spack.stage.FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))
if not version_hashes:
tty.die("Could not fetch any versions for {0}".format(name))
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)
# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}'{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))
return version_lines
class SpackWebError(spack.error.SpackError):
"""Superclass for Spack web spidering errors."""
class VersionFetchError(SpackWebError):
"""Raised when we can't determine a URL to fetch a package."""
class NoNetworkConnectionError(SpackWebError):
"""Raised when an operation can't get an internet connection."""
def __init__(self, message, url):
super(NoNetworkConnectionError, self).__init__(
"No network connection: " + str(message),
"URL was: " + str(url))
self.url = url