Consolidate some web-spidering commands in spack.util.web

- move `spack.cmd.checksum.get_checksums` to `spack.util.web.spider_checksums`

- move `spack.error.NoNetworkError` to `spack.util.web.NoNetworkError` since
  it is only used there.
This commit is contained in:
Todd Gamblin 2017-09-23 15:17:42 -07:00
parent 2198a0e229
commit 94d85d842c
6 changed files with 118 additions and 116 deletions

View file

@ -25,13 +25,12 @@
from __future__ import print_function from __future__ import print_function
import argparse import argparse
import hashlib
import llnl.util.tty as tty import llnl.util.tty as tty
import spack import spack
import spack.cmd import spack.cmd
import spack.util.crypto import spack.util.crypto
from spack.stage import Stage, FailedDownloadError import spack.util.web
from spack.util.naming import * from spack.util.naming import *
from spack.version import * from spack.version import *
@ -52,90 +51,6 @@ def setup_parser(subparser):
help='versions to generate checksums for') help='versions to generate checksums for')
def get_checksums(url_dict, name, **kwargs):
"""Fetches and checksums archives from URLs.
This function is called by both ``spack checksum`` and ``spack create``.
The ``first_stage_function`` kwarg allows ``spack create`` to determine
things like the build system of the archive.
Args:
url_dict (dict): A dictionary of the form: version -> URL
name (str): The name of the package
first_stage_function (callable): Function to run on first staging area
keep_stage (bool): Don't clean up staging area when command completes
Returns:
str: A multi-line string containing versions and corresponding hashes
"""
first_stage_function = kwargs.get('first_stage_function', None)
keep_stage = kwargs.get('keep_stage', False)
sorted_versions = sorted(url_dict.keys(), reverse=True)
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v in sorted_versions)
num_ver = len(sorted_versions)
tty.msg("Found {0} version{1} of {2}:".format(
num_ver, '' if num_ver == 1 else 's', name),
"",
*spack.cmd.elide_list(
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
for v in sorted_versions]))
print()
archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=1, abort='q')
if not archives_to_fetch:
tty.die("Aborted.")
versions = sorted_versions[:archives_to_fetch]
urls = [url_dict[v] for v in versions]
tty.msg("Downloading...")
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)
# Checksum the archive and add it to the list
version_hashes.append((version, spack.util.crypto.checksum(
hashlib.md5, stage.archive_file)))
i += 1
except FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))
if not version_hashes:
tty.die("Could not fetch any versions for {0}".format(name))
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)
# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}'{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))
return version_lines
def checksum(parser, args): def checksum(parser, args):
# Make sure the user provided a package and not a URL # Make sure the user provided a package and not a URL
if not valid_fully_qualified_module_name(args.package): if not valid_fully_qualified_module_name(args.package):
@ -160,7 +75,7 @@ def checksum(parser, args):
if not url_dict: if not url_dict:
tty.die("Could not find any versions for {0}".format(pkg.name)) tty.die("Could not find any versions for {0}".format(pkg.name))
version_lines = get_checksums( version_lines = spack.util.web.get_checksums_for_versions(
url_dict, pkg.name, keep_stage=args.keep_stage) url_dict, pkg.name, keep_stage=args.keep_stage)
print() print()

View file

@ -30,7 +30,6 @@
import llnl.util.tty as tty import llnl.util.tty as tty
import spack import spack
import spack.cmd import spack.cmd
import spack.cmd.checksum
import spack.util.web import spack.util.web
from llnl.util.filesystem import mkdirp from llnl.util.filesystem import mkdirp
from spack.repository import Repo from spack.repository import Repo
@ -587,7 +586,7 @@ def get_versions(args, name):
version = parse_version(args.url) version = parse_version(args.url)
url_dict = {version: args.url} url_dict = {version: args.url}
versions = spack.cmd.checksum.get_checksums( versions = spack.util.web.get_checksums_for_versions(
url_dict, name, first_stage_function=guesser, url_dict, name, first_stage_function=guesser,
keep_stage=args.keep_stage) keep_stage=args.keep_stage)

View file

@ -113,16 +113,6 @@ def __init__(self, message):
super(UnsupportedPlatformError, self).__init__(message) super(UnsupportedPlatformError, self).__init__(message)
class NoNetworkConnectionError(SpackError):
"""Raised when an operation needs an internet connection."""
def __init__(self, message, url):
super(NoNetworkConnectionError, self).__init__(
"No network connection: " + str(message),
"URL was: " + str(url))
self.url = url
class SpecError(SpackError): class SpecError(SpackError):
"""Superclass for all errors that occur while constructing specs.""" """Superclass for all errors that occur while constructing specs."""

View file

@ -54,13 +54,14 @@
import spack import spack
import spack.error import spack.error
import spack.util.crypto as crypto import spack.util.crypto as crypto
import spack.util.pattern as pattern
from spack.util.executable import * from spack.util.executable import *
from spack.util.string import * from spack.util.string import *
from spack.version import Version, ver from spack.version import Version, ver
from spack.util.compression import decompressor_for, extension from spack.util.compression import decompressor_for, extension
import spack.util.pattern as pattern
"""List of all fetch strategies, created by FetchStrategy metaclass.""" #: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = [] all_strategies = []
@ -967,7 +968,7 @@ def from_list_url(pkg):
the specified package's version.""" the specified package's version."""
if pkg.list_url: if pkg.list_url:
try: try:
versions = pkg.fetch_remote_versions() versions = pkg.fetch_remote_package_versions()
try: try:
url_from_list = versions[pkg.version] url_from_list = versions[pkg.version]
digest = None digest = None

View file

@ -1841,7 +1841,7 @@ def fetch_remote_versions(self):
try: try:
return spack.util.web.find_versions_of_archive( return spack.util.web.find_versions_of_archive(
self.all_urls, self.list_url, self.list_depth) self.all_urls, self.list_url, self.list_depth)
except spack.error.NoNetworkConnectionError as e: except spack.util.web.NoNetworkConnectionError as e:
tty.die("Package.fetch_versions couldn't connect to:", e.url, tty.die("Package.fetch_versions couldn't connect to:", e.url,
e.message) e.message)
@ -2064,15 +2064,6 @@ def __init__(self, version):
"Please provide a url for this version in the package.py file.") "Please provide a url for this version in the package.py file.")
class VersionFetchError(PackageError):
"""Raised when a version URL cannot automatically be determined."""
def __init__(self, cls):
super(VersionFetchError, self).__init__(
"Cannot fetch versions for package %s " % cls.__name__ +
"because it does not define any URLs to fetch.")
class NoURLError(PackageError): class NoURLError(PackageError):
"""Raised when someone tries to build a URL for a package with no URLs.""" """Raised when someone tries to build a URL for a package with no URLs."""

View file

@ -22,11 +22,14 @@
# License along with this program; if not, write to the Free Software # License along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
############################################################################## ##############################################################################
from __future__ import print_function
import re import re
import os import os
import ssl import ssl
import sys import sys
import traceback import traceback
import hashlib
from six.moves.urllib.request import urlopen, Request from six.moves.urllib.request import urlopen, Request
from six.moves.urllib.error import URLError from six.moves.urllib.error import URLError
@ -50,8 +53,9 @@ class HTMLParseError(Exception):
import spack.error import spack.error
from spack.util.compression import ALLOWED_ARCHIVE_TYPES from spack.util.compression import ALLOWED_ARCHIVE_TYPES
# Timeout in seconds for web requests # Timeout in seconds for web requests
TIMEOUT = 10 _timeout = 10
class LinkParser(HTMLParser): class LinkParser(HTMLParser):
@ -127,7 +131,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
# if you ask for a tarball with Accept: text/html. # if you ask for a tarball with Accept: text/html.
req = Request(url) req = Request(url)
req.get_method = lambda: "HEAD" req.get_method = lambda: "HEAD"
resp = _urlopen(req, timeout=TIMEOUT, context=context) resp = _urlopen(req, timeout=_timeout, context=context)
if "Content-type" not in resp.headers: if "Content-type" not in resp.headers:
tty.debug("ignoring page " + url) tty.debug("ignoring page " + url)
@ -140,7 +144,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
# Do the real GET request when we know it's just HTML. # Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET" req.get_method = lambda: "GET"
response = _urlopen(req, timeout=TIMEOUT, context=context) response = _urlopen(req, timeout=_timeout, context=context)
response_url = response.geturl() response_url = response.geturl()
# Read the page and and stick it in the map we'll return # Read the page and and stick it in the map we'll return
@ -199,7 +203,7 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
"own risk.") "own risk.")
if raise_on_error: if raise_on_error:
raise spack.error.NoNetworkConnectionError(str(e), url) raise NoNetworkConnectionError(str(e), url)
except HTMLParseError as e: except HTMLParseError as e:
# This error indicates that Python's HTML parser sucks. # This error indicates that Python's HTML parser sucks.
@ -328,3 +332,105 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
continue continue
return versions return versions
def get_checksums_for_versions(
url_dict, name, first_stage_function=None, keep_stage=False):
"""Fetches and checksums archives from URLs.
This function is called by both ``spack checksum`` and ``spack
create``. The ``first_stage_function`` argument allows the caller to
inspect the first downloaded archive, e.g., to determine the build
system.
Args:
url_dict (dict): A dictionary of the form: version -> URL
name (str): The name of the package
first_stage_function (callable): function that takes a Stage and a URL;
this is run on the stage of the first URL downloaded
keep_stage (bool): whether to keep staging area when command completes
Returns:
(str): A multi-line string containing versions and corresponding hashes
"""
sorted_versions = sorted(url_dict.keys(), reverse=True)
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v in sorted_versions)
num_ver = len(sorted_versions)
tty.msg("Found {0} version{1} of {2}:".format(
num_ver, '' if num_ver == 1 else 's', name),
"",
*spack.cmd.elide_list(
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
for v in sorted_versions]))
print()
archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=1, abort='q')
if not archives_to_fetch:
tty.die("Aborted.")
versions = sorted_versions[:archives_to_fetch]
urls = [url_dict[v] for v in versions]
tty.msg("Downloading...")
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with spack.stage.Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)
# Checksum the archive and add it to the list
version_hashes.append((version, spack.util.crypto.checksum(
hashlib.md5, stage.archive_file)))
i += 1
except spack.stage.FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))
if not version_hashes:
tty.die("Could not fetch any versions for {0}".format(name))
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)
# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}'{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))
return version_lines
class SpackWebError(spack.error.SpackError):
"""Superclass for Spack web spidering errors."""
class VersionFetchError(SpackWebError):
"""Raised when we can't determine a URL to fetch a package."""
class NoNetworkConnectionError(SpackWebError):
"""Raised when an operation can't get an internet connection."""
def __init__(self, message, url):
super(NoNetworkConnectionError, self).__init__(
"No network connection: " + str(message),
"URL was: " + str(url))
self.url = url