fetching: S3 upload and download (#11117)

This extends Spack functionality so that it can fetch sources and binaries from-, push sources and binaries to-, and index the contents of- mirrors hosted on an S3 bucket. High level to-do list: - [x] Extend mirrors configuration to add support for `file://`, and `s3://` URLs. - [x] Ensure all fetching, pushing, and indexing operations work for `file://` URLs. - [x] Implement S3 source fetching - [x] Implement S3 binary mirror indexing - [x] Implement S3 binary package fetching - [x] Implement S3 source pushing - [x] Implement S3 binary package pushing Important details: * refactor URL handling to handle S3 URLs and mirror URLs more gracefully. - updated parse() to accept already-parsed URL objects. an equivalent object is returned with any extra s3-related attributes intact. Objects created with urllib can also be passed, and the additional s3 handling logic will still be applied. * update mirror schema/parsing (mirror can have separate fetch/push URLs) * implement s3_fetch_strategy/several utility changes * provide more feature-complete S3 fetching * update buildcache create command to support S3 * Move the core logic for reading data from S3 out of the s3 fetch strategy and into the s3 URL handler. The s3 fetch strategy now calls into `read_from_url()` Since read_from_url can now handle S3 URLs, the S3 fetch strategy is redundant. It's not clear whether the ideal design is to have S3 fetching functionality in a fetch strategy, directly implemented in read_from_url, or both. * expanded what can be passed to `spack buildcache` via the -d flag: In addition to a directory on the local filesystem, the name of a configured mirror can be passed, or a push URL can be passed directly.
2019-10-22 03:32:04 -04:00 · 2019-10-22 03:32:04 -04:00 · fd58c98b0e
commit fd58c98b0e
parent 6cb972a9d2
21 changed files with 1411 additions and 280 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,3 +1,8 @@
 # Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
 # Spack Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 generate ci jobs:
  script:
    - "./bin/generate-gitlab-ci-yml.sh"
--- a/bin/rebuild-index.sh
+++ b/bin/rebuild-index.sh
@ -10,4 +10,4 @@ set -x
 SPACK_BIN_DIR="${CI_PROJECT_DIR}/bin"
 export PATH="${SPACK_BIN_DIR}:${PATH}"
-spack upload-s3 index
+spack buildcache update-index -d "$MIRROR_URL"
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 import codecs
 import os
 import re
 import tarfile
@ -23,14 +24,32 @@
 import spack.util.gpg as gpg_util
 import spack.relocate as relocate
 import spack.util.spack_yaml as syaml
 import spack.mirror
 import spack.util.url as url_util
 import spack.util.web as web_util
 from spack.spec import Spec
 from spack.stage import Stage
 from spack.util.gpg import Gpg
 from spack.util.web import spider, read_from_url
 from spack.util.executable import ProcessError
 _build_cache_relative_path = 'build_cache'
 BUILD_CACHE_INDEX_TEMPLATE = '''
 <html>
 <head>
  <title>{title}</title>
 </head>
 <body>
 <ul>
 {path_list}
 </ul>
 </body>
 </html>
 '''
 BUILD_CACHE_INDEX_ENTRY_TEMPLATE = '  <li><a href="{path}">{path}</a></li>'
 class NoOverwriteException(Exception):
    """
@ -101,7 +120,7 @@ def build_cache_relative_path():
    return _build_cache_relative_path
-def build_cache_directory(prefix):
+def build_cache_prefix(prefix):
    return os.path.join(prefix, build_cache_relative_path())
@ -246,29 +265,36 @@ def sign_tarball(key, force, specfile_path):
    Gpg.sign(key, specfile_path, '%s.asc' % specfile_path)
-def _generate_html_index(path_list, output_path):
+def generate_package_index(cache_prefix):
-    f = open(output_path, 'w')
+    """Create the build cache index page.
    header = """<html>\n
 <head>\n</head>\n
 <list>\n"""
    footer = "</list>\n</html>\n"
    f.write(header)
    for path in path_list:
        rel = os.path.basename(path)
        f.write('<li><a href="%s"> %s</a>\n' % (rel, rel))
    f.write(footer)
    f.close()
    Creates (or replaces) the "index.html" page at the location given in
    cache_prefix.  This page contains a link for each binary package (*.yaml)
    and signing key (*.key) under cache_prefix.
    """
    tmpdir = tempfile.mkdtemp()
    try:
        index_html_path = os.path.join(tmpdir, 'index.html')
        file_list = (
            entry
            for entry in web_util.list_url(cache_prefix)
            if (entry.endswith('.yaml')
                or entry.endswith('.key')))
-def generate_package_index(build_cache_dir):
+        with open(index_html_path, 'w') as f:
-    yaml_list = os.listdir(build_cache_dir)
+            f.write(BUILD_CACHE_INDEX_TEMPLATE.format(
-    path_list = [os.path.join(build_cache_dir, l) for l in yaml_list]
+                title='Spack Package Index',
                path_list='\n'.join(
                    BUILD_CACHE_INDEX_ENTRY_TEMPLATE.format(path=path)
                    for path in file_list)))
-    index_html_path_tmp = os.path.join(build_cache_dir, 'index.html.tmp')
+        web_util.push_to_url(
-    index_html_path = os.path.join(build_cache_dir, 'index.html')
+            index_html_path,
-
+            url_util.join(cache_prefix, 'index.html'),
-    _generate_html_index(path_list, index_html_path_tmp)
+            keep_original=False,
-    shutil.move(index_html_path_tmp, index_html_path)
+            extra_args={'ContentType': 'text/html'})
    finally:
        shutil.rmtree(tmpdir)
 def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
@ -281,33 +307,41 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
        raise ValueError('spec must be concrete to build tarball')
    # set up some paths
-    build_cache_dir = build_cache_directory(outdir)
+    tmpdir = tempfile.mkdtemp()
    cache_prefix = build_cache_prefix(tmpdir)
    tarfile_name = tarball_name(spec, '.tar.gz')
-    tarfile_dir = os.path.join(build_cache_dir,
+    tarfile_dir = os.path.join(cache_prefix, tarball_directory_name(spec))
                               tarball_directory_name(spec))
    tarfile_path = os.path.join(tarfile_dir, tarfile_name)
    mkdirp(tarfile_dir)
    spackfile_path = os.path.join(
-        build_cache_dir, tarball_path_name(spec, '.spack'))
+        cache_prefix, tarball_path_name(spec, '.spack'))
-    if os.path.exists(spackfile_path):
+
    remote_spackfile_path = url_util.join(
        outdir, os.path.relpath(spackfile_path, tmpdir))
    mkdirp(tarfile_dir)
    if web_util.url_exists(remote_spackfile_path):
        if force:
-            os.remove(spackfile_path)
+            web_util.remove_url(remote_spackfile_path)
        else:
-            raise NoOverwriteException(str(spackfile_path))
+            raise NoOverwriteException(url_util.format(remote_spackfile_path))
    # need to copy the spec file so the build cache can be downloaded
    # without concretizing with the current spack packages
    # and preferences
    spec_file = os.path.join(spec.prefix, ".spack", "spec.yaml")
    specfile_name = tarball_name(spec, '.spec.yaml')
    specfile_path = os.path.realpath(
-        os.path.join(build_cache_dir, specfile_name))
+        os.path.join(cache_prefix, specfile_name))
-    if os.path.exists(specfile_path):
+    remote_specfile_path = url_util.join(
        outdir, os.path.relpath(specfile_path, os.path.realpath(tmpdir)))
    if web_util.url_exists(remote_specfile_path):
        if force:
-            os.remove(specfile_path)
+            web_util.remove_url(remote_specfile_path)
        else:
-            raise NoOverwriteException(str(specfile_path))
+            raise NoOverwriteException(url_util.format(remote_specfile_path))
    # make a copy of the install directory to work with
    workdir = os.path.join(tempfile.mkdtemp(), os.path.basename(spec.prefix))
@ -324,6 +358,7 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
        except Exception as e:
            shutil.rmtree(workdir)
            shutil.rmtree(tarfile_dir)
            shutil.rmtree(tmpdir)
            tty.die(e)
    else:
        try:
@ -331,7 +366,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
        except Exception as e:
            shutil.rmtree(workdir)
            shutil.rmtree(tarfile_dir)
            shutil.rmtree(tmpdir)
            tty.die(e)
    # create compressed tarball of the install prefix
    with closing(tarfile.open(tarfile_path, 'w:gz')) as tar:
        tar.add(name='%s' % workdir,
@ -360,7 +397,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
    spec_dict['full_hash'] = spec.full_hash()
    tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format(
-        spec_dict['full_hash'], spec.name, specfile_path))
+        spec_dict['full_hash'],
        spec.name,
        url_util.format(remote_specfile_path)))
    tty.debug(spec.tree())
    with open(specfile_path, 'w') as outfile:
@ -382,9 +421,19 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
    if not unsigned:
        os.remove('%s.asc' % specfile_path)
-    # create an index.html for the build_cache directory so specs can be found
+    web_util.push_to_url(
-    if regenerate_index:
+        spackfile_path, remote_spackfile_path, keep_original=False)
-        generate_package_index(build_cache_dir)
+    web_util.push_to_url(
        specfile_path, remote_specfile_path, keep_original=False)
    try:
        # create an index.html for the build_cache directory so specs can be
        # found
        if regenerate_index:
            generate_package_index(url_util.join(
                outdir, os.path.relpath(cache_prefix, tmpdir)))
    finally:
        shutil.rmtree(tmpdir)
    return None
@ -394,13 +443,16 @@ def download_tarball(spec):
    Download binary tarball for given package into stage area
    Return True if successful
    """
-    mirrors = spack.config.get('mirrors')
+    if not spack.mirror.MirrorCollection():
    if len(mirrors) == 0:
        tty.die("Please add a spack mirror to allow " +
                "download of pre-compiled packages.")
    tarball = tarball_path_name(spec, '.spack')
-    for mirror_name, mirror_url in mirrors.items():
+
-        url = mirror_url + '/' + _build_cache_relative_path + '/' + tarball
+    for mirror in spack.mirror.MirrorCollection().values():
        url = url_util.join(
            mirror.fetch_url, _build_cache_relative_path, tarball)
        # stage the tarball into standard place
        stage = Stage(url, name="build_cache", keep=True)
        try:
@ -408,6 +460,7 @@ def download_tarball(spec):
            return stage.save_filename
        except fs.FetchError:
            continue
    return None
@ -610,26 +663,29 @@ def get_specs(force=False):
        tty.debug("Using previously-retrieved specs")
        return _cached_specs
-    mirrors = spack.config.get('mirrors')
+    if not spack.mirror.MirrorCollection():
-    if len(mirrors) == 0:
+        tty.warn("No Spack mirrors are currently configured")
        tty.debug("No Spack mirrors are currently configured")
        return {}
    urls = set()
-    for mirror_name, mirror_url in mirrors.items():
+    for mirror in spack.mirror.MirrorCollection().values():
-        if mirror_url.startswith('file'):
+        fetch_url_build_cache = url_util.join(
-            mirror = mirror_url.replace(
+            mirror.fetch_url, _build_cache_relative_path)
-                'file://', '') + "/" + _build_cache_relative_path
+
-            tty.msg("Finding buildcaches in %s" % mirror)
+        mirror_dir = url_util.local_file_path(fetch_url_build_cache)
-            if os.path.exists(mirror):
+        if mirror_dir:
-                files = os.listdir(mirror)
+            tty.msg("Finding buildcaches in %s" % mirror_dir)
            if os.path.exists(mirror_dir):
                files = os.listdir(mirror_dir)
                for file in files:
                    if re.search('spec.yaml', file):
-                        link = 'file://' + mirror + '/' + file
+                        link = url_util.join(fetch_url_build_cache, file)
                        urls.add(link)
        else:
-            tty.msg("Finding buildcaches on %s" % mirror_url)
+            tty.msg("Finding buildcaches at %s" %
-            p, links = spider(mirror_url + "/" + _build_cache_relative_path)
+                    url_util.format(fetch_url_build_cache))
            p, links = web_util.spider(
                url_util.join(fetch_url_build_cache, 'index.html'))
            for link in links:
                if re.search("spec.yaml", link):
                    urls.add(link)
@ -659,28 +715,33 @@ def get_keys(install=False, trust=False, force=False):
    """
    Get pgp public keys available on mirror
    """
-    mirrors = spack.config.get('mirrors')
+    if not spack.mirror.MirrorCollection():
    if len(mirrors) == 0:
        tty.die("Please add a spack mirror to allow " +
                "download of build caches.")
    keys = set()
-    for mirror_name, mirror_url in mirrors.items():
+
-        if mirror_url.startswith('file'):
+    for mirror in spack.mirror.MirrorCollection().values():
-            mirror = os.path.join(
+        fetch_url_build_cache = url_util.join(
-                mirror_url.replace('file://', ''), _build_cache_relative_path)
+            mirror.fetch_url, _build_cache_relative_path)
-            tty.msg("Finding public keys in %s" % mirror)
+
-            files = os.listdir(mirror)
+        mirror_dir = url_util.local_file_path(fetch_url_build_cache)
        if mirror_dir:
            tty.msg("Finding public keys in %s" % mirror_dir)
            files = os.listdir(mirror_dir)
            for file in files:
                if re.search(r'\.key', file):
-                    link = 'file://' + mirror + '/' + file
+                    link = url_util.join(fetch_url_build_cache, file)
                    keys.add(link)
        else:
-            tty.msg("Finding public keys on %s" % mirror_url)
+            tty.msg("Finding public keys at %s" %
-            p, links = spider(mirror_url + "/build_cache", depth=1)
+                    url_util.format(fetch_url_build_cache))
            p, links = web_util.spider(fetch_url_build_cache, depth=1)
            for link in links:
                if re.search(r'\.key', link):
                    keys.add(link)
        for link in keys:
            with Stage(link, name="build_cache", keep=True) as stage:
                if os.path.exists(stage.save_filename) and force:
@ -717,15 +778,16 @@ def needs_rebuild(spec, mirror_url, rebuild_on_errors=False):
    # Try to retrieve the .spec.yaml directly, based on the known
    # format of the name, in order to determine if the package
    # needs to be rebuilt.
-    build_cache_dir = build_cache_directory(mirror_url)
+    cache_prefix = build_cache_prefix(mirror_url)
    spec_yaml_file_name = tarball_name(spec, '.spec.yaml')
-    file_path = os.path.join(build_cache_dir, spec_yaml_file_name)
+    file_path = os.path.join(cache_prefix, spec_yaml_file_name)
    result_of_error = 'Package ({0}) will {1}be rebuilt'.format(
        spec.short_spec, '' if rebuild_on_errors else 'not ')
    try:
-        yaml_contents = read_from_url(file_path)
+        _, _, yaml_file = web_util.read_from_url(file_path)
        yaml_contents = codecs.getreader('utf-8')(yaml_file).read()
    except URLError as url_err:
        err_msg = [
            'Unable to determine whether {0} needs rebuilding,',
@ -782,22 +844,22 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
    """
    rebuilds = {}
-    for mirror_name, mirror_url in mirrors.items():
+    for mirror in spack.mirror.MirrorCollection(mirrors).values():
-        tty.msg('Checking for built specs at %s' % mirror_url)
+        tty.msg('Checking for built specs at %s' % mirror.fetch_url)
        rebuild_list = []
        for spec in specs:
-            if needs_rebuild(spec, mirror_url, rebuild_on_errors):
+            if needs_rebuild(spec, mirror.fetch_url, rebuild_on_errors):
                rebuild_list.append({
                    'short_spec': spec.short_spec,
                    'hash': spec.dag_hash()
                })
        if rebuild_list:
-            rebuilds[mirror_url] = {
+            rebuilds[mirror.fetch_url] = {
-                'mirrorName': mirror_name,
+                'mirrorName': mirror.name,
-                'mirrorUrl': mirror_url,
+                'mirrorUrl': mirror.fetch_url,
                'rebuildSpecs': rebuild_list
            }
@ -810,33 +872,36 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
 def _download_buildcache_entry(mirror_root, descriptions):
    for description in descriptions:
-        url = os.path.join(mirror_root, description['url'])
+        description_url = os.path.join(mirror_root, description['url'])
        path = description['path']
        fail_if_missing = description['required']
        mkdirp(path)
-        stage = Stage(url, name="build_cache", path=path, keep=True)
+        stage = Stage(
            description_url, name="build_cache", path=path, keep=True)
        try:
            stage.fetch()
        except fs.FetchError as e:
            tty.debug(e)
            if fail_if_missing:
-                tty.error('Failed to download required url {0}'.format(url))
+                tty.error('Failed to download required url {0}'.format(
                    description_url))
                return False
    return True
 def download_buildcache_entry(file_descriptions):
-    mirrors = spack.config.get('mirrors')
+    if not spack.mirror.MirrorCollection():
    if len(mirrors) == 0:
        tty.die("Please add a spack mirror to allow " +
                "download of buildcache entries.")
-    for mirror_name, mirror_url in mirrors.items():
+    for mirror in spack.mirror.MirrorCollection().values():
-        mirror_root = os.path.join(mirror_url, _build_cache_relative_path)
+        mirror_root = os.path.join(
            mirror.fetch_url,
            _build_cache_relative_path)
        if _download_buildcache_entry(mirror_root, file_descriptions):
            return True
--- a/lib/spack/spack/caches.py
+++ b/lib/spack/spack/caches.py
@ -9,11 +9,13 @@
 import llnl.util.lang
 from llnl.util.filesystem import mkdirp
 import spack.error
 import spack.paths
 import spack.config
 import spack.fetch_strategy
 import spack.util.file_cache
-from spack.util.path import canonicalize_path
+import spack.util.path
 import spack.util.url as url_util
 def _misc_cache():
@ -25,7 +27,7 @@ def _misc_cache():
    path = spack.config.get('config:misc_cache')
    if not path:
        path = os.path.join(spack.paths.user_config_path, 'cache')
-    path = canonicalize_path(path)
+    path = spack.util.path.canonicalize_path(path)
    return spack.util.file_cache.FileCache(path)
@ -43,22 +45,26 @@ def _fetch_cache():
    path = spack.config.get('config:source_cache')
    if not path:
        path = os.path.join(spack.paths.var_path, "cache")
-    path = canonicalize_path(path)
+    path = spack.util.path.canonicalize_path(path)
    return spack.fetch_strategy.FsCache(path)
 class MirrorCache(object):
    def __init__(self, root):
-        self.root = os.path.abspath(root)
+        self.root = url_util.local_file_path(root)
        if not self.root:
            raise spack.error.SpackError(
                'MirrorCaches only work with file:// URLs')
        self.new_resources = set()
        self.existing_resources = set()
    def store(self, fetcher, relative_dest):
        # Note this will archive package sources even if they would not
        # normally be cached (e.g. the current tip of an hg/git branch)
        dst = os.path.join(self.root, relative_dest)
        if os.path.exists(dst):
            self.existing_resources.add(relative_dest)
        else:
--- a/lib/spack/spack/cmd/buildcache.py
+++ b/lib/spack/spack/cmd/buildcache.py
@ -14,6 +14,7 @@
 import spack.cmd.common.arguments as arguments
 import spack.environment as ev
 import spack.hash_types as ht
 import spack.mirror
 import spack.relocate
 import spack.repo
 import spack.spec
@ -21,6 +22,8 @@
 import spack.config
 import spack.repo
 import spack.store
 import spack.util.url as url_util
 from spack.error import SpecError
 from spack.spec import Spec, save_dependency_spec_yamls
@ -205,6 +208,13 @@ def setup_parser(subparser):
        help='Destination mirror url')
    copy.set_defaults(func=buildcache_copy)
    # Update buildcache index without copying any additional packages
    update_index = subparsers.add_parser(
        'update-index', help=buildcache_update_index.__doc__)
    update_index.add_argument(
        '-d', '--mirror-url', default=None, help='Destination mirror url')
    update_index.set_defaults(func=buildcache_update_index)
 def find_matching_specs(pkgs, allow_multiple_matches=False, env=None):
    """Returns a list of specs matching the not necessarily
@ -312,9 +322,14 @@ def createtarball(args):
                " yaml file containing a spec to install")
    pkgs = set(packages)
    specs = set()
    outdir = '.'
    if args.directory:
        outdir = args.directory
    mirror = spack.mirror.MirrorCollection().lookup(outdir)
    outdir = url_util.format(mirror.push_url)
    signkey = None
    if args.key:
        signkey = args.key
@ -649,6 +664,19 @@ def buildcache_copy(args):
        shutil.copyfile(cdashid_src_path, cdashid_dest_path)
 def buildcache_update_index(args):
    """Update a buildcache index."""
    outdir = '.'
    if args.mirror_url:
        outdir = args.mirror_url
    mirror = spack.mirror.MirrorCollection().lookup(outdir)
    outdir = url_util.format(mirror.push_url)
    bindist.generate_package_index(
        url_util.join(outdir, bindist.build_cache_relative_path()))
 def buildcache(parser, args):
    if args.func:
        args.func(args)
--- a/lib/spack/spack/cmd/checksum.py
+++ b/lib/spack/spack/cmd/checksum.py
@ -11,8 +11,8 @@
 import spack.cmd
 import spack.repo
 import spack.stage
 import spack.util.crypto
 import spack.util.web
 from spack.util.naming import valid_fully_qualified_module_name
 from spack.version import ver, Version
@ -56,7 +56,7 @@ def checksum(parser, args):
        if not url_dict:
            tty.die("Could not find any versions for {0}".format(pkg.name))
-    version_lines = spack.util.web.get_checksums_for_versions(
+    version_lines = spack.stage.get_checksums_for_versions(
        url_dict, pkg.name, keep_stage=args.keep_stage)
    print()
--- a/lib/spack/spack/cmd/create.py
+++ b/lib/spack/spack/cmd/create.py
@ -13,6 +13,7 @@
 import spack.util.web
 import spack.repo
 import spack.stage
 from spack.spec import Spec
 from spack.util.editor import editor
 from spack.util.executable import which, ProcessError
@ -618,7 +619,7 @@ def get_versions(args, name):
            version = parse_version(args.url)
            url_dict = {version: args.url}
-        versions = spack.util.web.get_checksums_for_versions(
+        versions = spack.stage.get_checksums_for_versions(
            url_dict, name, first_stage_function=guesser,
            keep_stage=args.keep_stage)
    else:
--- a/lib/spack/spack/cmd/mirror.py
+++ b/lib/spack/spack/cmd/mirror.py
@ -4,20 +4,21 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 import sys
 import os
 from datetime import datetime
 import argparse
 import llnl.util.tty as tty
 from llnl.util.tty.colify import colify
 import spack.cmd
 import spack.cmd.common.arguments as arguments
 import spack.concretize
 import spack.config
 import spack.environment as ev
 import spack.mirror
 import spack.repo
-import spack.cmd.common.arguments as arguments
+import spack.util.url as url_util
-import spack.environment as ev
+import spack.util.web as web_util
 from spack.spec import Spec
 from spack.error import SpackError
 from spack.util.spack_yaml import syaml_dict
@ -73,6 +74,19 @@ def setup_parser(subparser):
        default=spack.config.default_modify_scope(),
        help="configuration scope to modify")
    # Set-Url
    set_url_parser = sp.add_parser('set-url', help=mirror_set_url.__doc__)
    set_url_parser.add_argument('name', help="mnemonic name for mirror")
    set_url_parser.add_argument(
        'url', help="url of mirror directory from 'spack mirror create'")
    set_url_parser.add_argument(
        '--push', action='store_true',
        help="set only the URL used for uploading new packages")
    set_url_parser.add_argument(
        '--scope', choices=scopes, metavar=scopes_metavar,
        default=spack.config.default_modify_scope(),
        help="configuration scope to modify")
    # List
    list_parser = sp.add_parser('list', help=mirror_list.__doc__)
    list_parser.add_argument(
@ -83,20 +97,14 @@ def setup_parser(subparser):
 def mirror_add(args):
    """Add a mirror to Spack."""
-    url = args.url
+    url = url_util.format(args.url)
    if url.startswith('/'):
        url = 'file://' + url
    mirrors = spack.config.get('mirrors', scope=args.scope)
    if not mirrors:
        mirrors = syaml_dict()
-    for name, u in mirrors.items():
+    if args.name in mirrors:
-        if name == args.name:
+        tty.die("Mirror with name %s already exists." % args.name)
            tty.die("Mirror with name %s already exists." % name)
        if u == url:
            tty.die("Mirror with url %s already exists." % url)
        # should only be one item per mirror dict.
    items = [(n, u) for n, u in mirrors.items()]
    items.insert(0, (args.name, url))
@ -117,21 +125,86 @@ def mirror_remove(args):
    old_value = mirrors.pop(name)
    spack.config.set('mirrors', mirrors, scope=args.scope)
-    tty.msg("Removed mirror %s with url %s" % (name, old_value))
+
    debug_msg_url = "url %s"
    debug_msg = ["Removed mirror %s with"]
    values = [name]
    try:
        fetch_value = old_value['fetch']
        push_value = old_value['push']
        debug_msg.extend(("fetch", debug_msg_url, "and push", debug_msg_url))
        values.extend((fetch_value, push_value))
    except TypeError:
        debug_msg.append(debug_msg_url)
        values.append(old_value)
    tty.debug(" ".join(debug_msg) % tuple(values))
    tty.msg("Removed mirror %s." % name)
 def mirror_set_url(args):
    """Change the URL of a mirror."""
    url = url_util.format(args.url)
    mirrors = spack.config.get('mirrors', scope=args.scope)
    if not mirrors:
        mirrors = syaml_dict()
    if args.name not in mirrors:
        tty.die("No mirror found with name %s." % args.name)
    entry = mirrors[args.name]
    try:
        fetch_url = entry['fetch']
        push_url = entry['push']
    except TypeError:
        fetch_url, push_url = entry, entry
    changes_made = False
    if args.push:
        changes_made = changes_made or push_url != url
        push_url = url
    else:
        changes_made = (
            changes_made or fetch_url != push_url or push_url != url)
        fetch_url, push_url = url, url
    items = [
        (
            (n, u)
            if n != args.name else (
                (n, {"fetch": fetch_url, "push": push_url})
                if fetch_url != push_url else (n, fetch_url)
            )
        )
        for n, u in mirrors.items()
    ]
    mirrors = syaml_dict(items)
    spack.config.set('mirrors', mirrors, scope=args.scope)
    if changes_made:
        tty.msg(
            "Changed%s url for mirror %s." %
            ((" (push)" if args.push else ""), args.name))
    else:
        tty.msg("Url already set for mirror %s." % args.name)
 def mirror_list(args):
    """Print out available mirrors to the console."""
-    mirrors = spack.config.get('mirrors', scope=args.scope)
+
    mirrors = spack.mirror.MirrorCollection(scope=args.scope)
    if not mirrors:
        tty.msg("No mirrors configured.")
        return
-    max_len = max(len(n) for n in mirrors.keys())
+    mirrors.display()
    fmt = "%%-%ds%%s" % (max_len + 4)
    for name in mirrors:
        print(fmt % (name, mirrors[name]))
 def _read_specs_from_file(filename):
@ -188,14 +261,13 @@ def mirror_create(args):
            msg = 'Skipping {0} as it is an external spec.'
            tty.msg(msg.format(spec.cshort_spec))
-        # Default name for directory is spack-mirror-<DATESTAMP>
+        mirror = spack.mirror.Mirror(
-        directory = args.directory
+            args.directory or spack.config.get('config:source_cache'))
-        if not directory:
+
-            timestamp = datetime.now().strftime("%Y-%m-%d")
+        directory = url_util.format(mirror.push_url)
            directory = 'spack-mirror-' + timestamp
        # Make sure nothing is in the way.
-        existed = os.path.isdir(directory)
+        existed = web_util.url_exists(directory)
        # Actually do the work to create the mirror
        present, mirrored, error = spack.mirror.create(
@ -220,6 +292,7 @@ def mirror(parser, args):
              'add': mirror_add,
              'remove': mirror_remove,
              'rm': mirror_remove,
              'set-url': mirror_set_url,
              'list': mirror_list}
    if args.no_checksum:
--- a/lib/spack/spack/cmd/url.py
+++ b/lib/spack/spack/cmd/url.py
@ -5,10 +5,8 @@
 from __future__ import division, print_function
 from collections import defaultdict
-try:
+
-    from urllib.parse import urlparse
+import six.moves.urllib.parse as urllib_parse
 except ImportError:
    from urlparse import urlparse
 import spack.fetch_strategy as fs
 import spack.repo
@ -262,7 +260,7 @@ def add(self, fetcher):
                self.checksums[algo] += 1
                # parse out the URL scheme (https/http/ftp/etc.)
-                urlinfo = urlparse(fetcher.url)
+                urlinfo = urllib_parse.urlparse(fetcher.url)
                self.schemes[urlinfo.scheme] += 1
            elif url_type == 'git':
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@ -23,6 +23,7 @@
        Archive a source directory, e.g. for creating a mirror.
 """
 import os
 import os.path
 import sys
 import re
 import shutil
@ -30,6 +31,7 @@
 import xml.etree.ElementTree
 from functools import wraps
 from six import string_types, with_metaclass
 import six.moves.urllib.parse as urllib_parse
 import llnl.util.tty as tty
 from llnl.util.filesystem import (
@ -39,6 +41,9 @@
 import spack.error
 import spack.util.crypto as crypto
 import spack.util.pattern as pattern
 import spack.util.web as web_util
 import spack.util.url as url_util
 from spack.util.executable import which
 from spack.util.string import comma_and, quote
 from spack.version import Version, ver
@ -48,6 +53,17 @@
 #: List of all fetch strategies, created by FetchStrategy metaclass.
 all_strategies = []
 CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = (
    "The contents of {subject} look like {content_type}.  Either the URL"
    " you are trying to use does not exist or you have an internet gateway"
    " issue.  You can remove the bad archive using 'spack clean"
    " <package>', then try again using the correct URL.")
 def warn_content_type_mismatch(subject, content_type='HTML'):
    tty.warn(CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(
        subject=subject, content_type=content_type))
 def _needs_stage(fun):
    """Many methods on fetch strategies require a stage to be set
@ -351,12 +367,7 @@ def fetch(self):
        content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
                                   flags=re.IGNORECASE)
        if content_types and 'text/html' in content_types[-1]:
-            msg = ("The contents of {0} look like HTML. Either the URL "
+            warn_content_type_mismatch(self.archive_file or "the archive")
                   "you are trying to use does not exist or you have an "
                   "internet gateway issue. You can remove the bad archive "
                   "using 'spack clean <package>', then try again using "
                   "the correct URL.")
            tty.warn(msg.format(self.archive_file or "the archive"))
        if save_file:
            os.rename(partial_file, save_file)
@ -449,7 +460,10 @@ def archive(self, destination):
        if not self.archive_file:
            raise NoArchiveFileError("Cannot call archive() before fetching.")
-        shutil.copyfile(self.archive_file, destination)
+        web_util.push_to_url(
            self.archive_file,
            destination,
            keep_original=True)
    @_needs_stage
    def check(self):
@ -1063,6 +1077,54 @@ def __str__(self):
        return "[hg] %s" % self.url
 class S3FetchStrategy(URLFetchStrategy):
    """FetchStrategy that pulls from an S3 bucket."""
    enabled = True
    url_attr = 's3'
    def __init__(self, *args, **kwargs):
        try:
            super(S3FetchStrategy, self).__init__(*args, **kwargs)
        except ValueError:
            if not kwargs.get('url'):
                raise ValueError(
                    "S3FetchStrategy requires a url for fetching.")
    @_needs_stage
    def fetch(self):
        if self.archive_file:
            tty.msg("Already downloaded %s" % self.archive_file)
            return
        parsed_url = url_util.parse(self.url)
        if parsed_url.scheme != 's3':
            raise ValueError(
                'S3FetchStrategy can only fetch from s3:// urls.')
        tty.msg("Fetching %s" % self.url)
        basename = os.path.basename(parsed_url.path)
        with working_dir(self.stage.path):
            _, headers, stream = web_util.read_from_url(self.url)
            with open(basename, 'wb') as f:
                shutil.copyfileobj(stream, f)
            content_type = headers['Content-type']
        if content_type == 'text/html':
            warn_content_type_mismatch(self.archive_file or "the archive")
        if self.stage.save_filename:
            os.rename(
                os.path.join(self.stage.path, basename),
                self.stage.save_filename)
        if not self.archive_file:
            raise FailedDownloadError(self.url)
 def from_url(url):
    """Given a URL, find an appropriate fetch strategy for it.
       Currently just gives you a URLFetchStrategy that uses curl.
@ -1206,6 +1268,34 @@ def for_package_version(pkg, version):
    raise InvalidArgsError(pkg, version, **args)
 def from_url_scheme(url, *args, **kwargs):
    """Finds a suitable FetchStrategy by matching its url_attr with the scheme
       in the given url."""
    url = kwargs.get('url', url)
    parsed_url = urllib_parse.urlparse(url, scheme='file')
    scheme_mapping = (
        kwargs.get('scheme_mapping') or
        {
            'file': 'url',
            'http': 'url',
            'https': 'url'
        })
    scheme = parsed_url.scheme
    scheme = scheme_mapping.get(scheme, scheme)
    for fetcher in all_strategies:
        url_attr = getattr(fetcher, 'url_attr', None)
        if url_attr and url_attr == scheme:
            return fetcher(url, *args, **kwargs)
    raise ValueError(
        'No FetchStrategy found for url with scheme: "{SCHEME}"'.format(
            SCHEME=parsed_url.scheme))
 def from_list_url(pkg):
    """If a package provides a URL which lists URLs for resources by
       version, this can can create a fetcher for a URL discovered for
--- a/lib/spack/spack/mirror.py
+++ b/lib/spack/spack/mirror.py
@ -13,6 +13,18 @@
 """
 import sys
 import os
 import os.path
 import operator
 import six
 import ruamel.yaml.error as yaml_error
 try:
    from collections.abc import Mapping
 except ImportError:
    from collections import Mapping
 import llnl.util.tty as tty
 from llnl.util.filesystem import mkdirp
@ -20,9 +32,205 @@
 import spack.error
 import spack.url as url
 import spack.fetch_strategy as fs
-from spack.spec import Spec
+import spack.util.spack_json as sjson
 import spack.util.spack_yaml as syaml
 import spack.util.url as url_util
 import spack.spec
 from spack.version import VersionList
 from spack.util.compression import allowed_archive
 from spack.util.spack_yaml import syaml_dict
 def _display_mirror_entry(size, name, url, type_=None):
    if type_:
        type_ = "".join((" (", type_, ")"))
    else:
        type_ = ""
    print("%-*s%s%s" % (size + 4, name, url, type_))
 class Mirror(object):
    """Represents a named location for storing source tarballs and binary
    packages.
    Mirrors have a fetch_url that indicate where and how artifacts are fetched
    from them, and a push_url that indicate where and how artifacts are pushed
    to them.  These two URLs are usually the same.
    """
    def __init__(self, fetch_url, push_url=None, name=None):
        self._fetch_url = fetch_url
        self._push_url = push_url
        self._name = name
    def to_json(self, stream=None):
        return sjson.dump(self.to_dict(), stream)
    def to_yaml(self, stream=None):
        return syaml.dump(self.to_dict(), stream)
    @staticmethod
    def from_yaml(stream, name=None):
        try:
            data = syaml.load(stream)
            return Mirror.from_dict(data, name)
        except yaml_error.MarkedYAMLError as e:
            raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
    @staticmethod
    def from_json(stream, name=None):
        d = sjson.load(stream)
        return Mirror.from_dict(d, name)
    def to_dict(self):
        if self._push_url is None:
            return self._fetch_url
        else:
            return syaml_dict([
                ('fetch', self._fetch_url),
                ('push', self._push_url)])
    @staticmethod
    def from_dict(d, name=None):
        if isinstance(d, six.string_types):
            return Mirror(d, name=name)
        else:
            return Mirror(d['fetch'], d['push'], name)
    def display(self, max_len=0):
        if self._push_url is None:
            _display_mirror_entry(max_len, self._name, self._fetch_url)
        else:
            _display_mirror_entry(
                max_len, self._name, self._fetch_url, "fetch")
            _display_mirror_entry(
                max_len, self._name, self._push_url, "push")
    def __str__(self):
        name = self._name
        if name is None:
            name = ''
        else:
            name = ' "%s"' % name
        if self._push_url is None:
            return "[Mirror%s (%s)]" % (name, self._fetch_url)
        return "[Mirror%s (fetch: %s, push: %s)]" % (
            name, self._fetch_url, self._push_url)
    def __repr__(self):
        return ''.join((
            'Mirror(',
            ', '.join(
                '%s=%s' % (k, repr(v))
                for k, v in (
                    ('fetch_url', self._fetch_url),
                    ('push_url', self._push_url),
                    ('name', self._name))
                if k == 'fetch_url' or v),
            ')'
        ))
    @property
    def name(self):
        return self._name or "<unnamed>"
    @property
    def fetch_url(self):
        return self._fetch_url
    @fetch_url.setter
    def fetch_url(self, url):
        self._fetch_url = url
        self._normalize()
    @property
    def push_url(self):
        if self._push_url is None:
            return self._fetch_url
        return self._push_url
    @push_url.setter
    def push_url(self, url):
        self._push_url = url
        self._normalize()
    def _normalize(self):
        if self._push_url is not None and self._push_url == self._fetch_url:
            self._push_url = None
 class MirrorCollection(Mapping):
    """A mapping of mirror names to mirrors."""
    def __init__(self, mirrors=None, scope=None):
        self._mirrors = dict(
            (name, Mirror.from_dict(mirror, name))
            for name, mirror in (
                mirrors.items() if mirrors is not None else
                spack.config.get('mirrors', scope=scope).items()))
    def to_json(self, stream=None):
        return sjson.dump(self.to_dict(True), stream)
    def to_yaml(self, stream=None):
        return syaml.dump(self.to_dict(True), stream)
    @staticmethod
    def from_yaml(stream, name=None):
        try:
            data = syaml.load(stream)
            return MirrorCollection(data)
        except yaml_error.MarkedYAMLError as e:
            raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
    @staticmethod
    def from_json(stream, name=None):
        d = sjson.load(stream)
        return MirrorCollection(d)
    def to_dict(self, recursive=False):
        return syaml_dict(sorted(
            (
                (k, (v.to_dict() if recursive else v))
                for (k, v) in self._mirrors.items()
            ), key=operator.itemgetter(0)
        ))
    @staticmethod
    def from_dict(d):
        return MirrorCollection(d)
    def __getitem__(self, item):
        return self._mirrors[item]
    def display(self):
        max_len = max(len(mirror.name) for mirror in self._mirrors.values())
        for mirror in self._mirrors.values():
            mirror.display(max_len)
    def lookup(self, name_or_url):
        """Looks up and returns a Mirror.
        If this MirrorCollection contains a named Mirror under the name
        [name_or_url], then that mirror is returned.  Otherwise, [name_or_url]
        is assumed to be a mirror URL, and an anonymous mirror with the given
        URL is returned.
        """
        result = self.get(name_or_url)
        if result is None:
            result = Mirror(fetch_url=name_or_url)
        return result
    def __iter__(self):
        return iter(self._mirrors)
    def __len__(self):
        return len(self._mirrors)
 def mirror_archive_filename(spec, fetcher, resource_id=None):
@ -114,7 +322,7 @@ def get_matching_versions(specs, **kwargs):
            # Generate only versions that satisfy the spec.
            if spec.concrete or v.satisfies(spec.versions):
-                s = Spec(pkg.name)
+                s = spack.spec.Spec(pkg.name)
                s.versions = VersionList([v])
                s.variants = spec.variants.copy()
                # This is needed to avoid hanging references during the
@ -166,12 +374,17 @@ def create(path, specs, **kwargs):
    it creates specs for those versions.  If the version satisfies any spec
    in the specs list, it is downloaded and added to the mirror.
    """
    parsed = url_util.parse(path)
    mirror_root = url_util.local_file_path(parsed)
    # Make sure nothing is in the way.
-    if os.path.isfile(path):
+    if mirror_root and os.path.isfile(mirror_root):
-        raise MirrorError("%s already exists and is a file." % path)
+        raise MirrorError("%s already exists and is a file." % mirror_root)
    # automatically spec-ify anything in the specs array.
-    specs = [s if isinstance(s, Spec) else Spec(s) for s in specs]
+    specs = [
        s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s)
        for s in specs]
    # Get concrete specs for each matching version of these specs.
    version_specs = get_matching_versions(
@ -180,8 +393,7 @@ def create(path, specs, **kwargs):
        s.concretize()
    # Get the absolute path of the root before we start jumping around.
-    mirror_root = os.path.abspath(path)
+    if mirror_root and not os.path.isdir(mirror_root):
    if not os.path.isdir(mirror_root):
        try:
            mkdirp(mirror_root)
        except OSError as e:
@ -195,12 +407,12 @@ def create(path, specs, **kwargs):
        'error': []
    }
-    mirror_cache = spack.caches.MirrorCache(mirror_root)
+    mirror_cache = spack.caches.MirrorCache(parsed)
    try:
        spack.caches.mirror_cache = mirror_cache
        # Iterate through packages and download all safe tarballs for each
        for spec in version_specs:
-            add_single_spec(spec, mirror_root, categories, **kwargs)
+            add_single_spec(spec, parsed, categories, **kwargs)
    finally:
        spack.caches.mirror_cache = None
--- a/lib/spack/spack/s3_handler.py
+++ b/lib/spack/spack/s3_handler.py
@ -0,0 +1,92 @@
 # Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
 # Spack Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 from io import BufferedReader
 import six.moves.urllib.response as urllib_response
 import six.moves.urllib.request as urllib_request
 import six.moves.urllib.error as urllib_error
 import spack.util.s3 as s3_util
 import spack.util.url as url_util
 import spack.util.web as web_util
 # NOTE(opadron): Workaround issue in boto where its StreamingBody
 # implementation is missing several APIs expected from IOBase.  These missing
 # APIs prevent the streams returned by boto from being passed as-are along to
 # urllib.
 #
 # https://github.com/boto/botocore/issues/879
 # https://github.com/python/cpython/pull/3249
 class WrapStream(BufferedReader):
    def __init__(self, raw):
        raw.readable = lambda: True
        raw.writable = lambda: False
        raw.seekable = lambda: False
        raw.closed = False
        raw.flush = lambda: None
        super(WrapStream, self).__init__(raw)
    def detach(self):
        self.raw = None
    def read(self, *args, **kwargs):
        return self.raw.read(*args, **kwargs)
    def __getattr__(self, key):
        return getattr(self.raw, key)
 def _s3_open(url):
    parsed = url_util.parse(url)
    s3 = s3_util.create_s3_session(parsed)
    bucket = parsed.netloc
    key = parsed.path
    if key.startswith('/'):
        key = key[1:]
    obj = s3.get_object(Bucket=bucket, Key=key)
    # NOTE(opadron): Apply workaround here (see above)
    stream = WrapStream(obj['Body'])
    headers = web_util.standardize_header_names(
        obj['ResponseMetadata']['HTTPHeaders'])
    return url, headers, stream
 class UrllibS3Handler(urllib_request.HTTPSHandler):
    def s3_open(self, req):
        orig_url = req.get_full_url()
        from botocore.exceptions import ClientError
        try:
            url, headers, stream = _s3_open(orig_url)
            return urllib_response.addinfourl(stream, headers, url)
        except ClientError as err:
            # if no such [KEY], but [KEY]/index.html exists,
            # return that, instead.
            if err.response['Error']['Code'] == 'NoSuchKey':
                try:
                    _, headers, stream = _s3_open(
                        url_util.join(orig_url, 'index.html'))
                    return urllib_response.addinfourl(
                        stream, headers, orig_url)
                except ClientError as err2:
                    if err.response['Error']['Code'] == 'NoSuchKey':
                        # raise original error
                        raise urllib_error.URLError(err)
                    raise urllib_error.URLError(err2)
            raise urllib_error.URLError(err)
 S3OpenerDirector = urllib_request.build_opener(UrllibS3Handler())
 open = S3OpenerDirector.open
--- a/lib/spack/spack/schema/mirrors.py
+++ b/lib/spack/spack/schema/mirrors.py
@ -17,7 +17,19 @@
        'default': {},
        'additionalProperties': False,
        'patternProperties': {
-            r'\w[\w-]*': {'type': 'string'},
+            r'\w[\w-]*': {
                'anyOf': [
                    {'type': 'string'},
                    {
                        'type': 'object',
                        'required': ['fetch', 'push'],
                        'properties': {
                            'fetch': {'type': 'string'},
                            'push': {'type': 'string'}
                        }
                    }
                ]
            },
        },
    },
 }
--- a/lib/spack/spack/stage.py
+++ b/lib/spack/spack/stage.py
@ -12,7 +12,6 @@
 import getpass
 from six import string_types
 from six import iteritems
 from six.moves.urllib.parse import urljoin
 import llnl.util.tty as tty
 from llnl.util.filesystem import mkdirp, can_access, install, install_tree
@ -20,12 +19,16 @@
 import spack.paths
 import spack.caches
 import spack.cmd
 import spack.config
 import spack.error
 import spack.mirror
 import spack.util.lock
 import spack.fetch_strategy as fs
 import spack.util.pattern as pattern
 import spack.util.path as sup
 import spack.util.url as url_util
 from spack.util.crypto import prefix_bits, bit_length
@ -252,7 +255,7 @@ def __init__(
        # TODO: fetch/stage coupling needs to be reworked -- the logic
        # TODO: here is convoluted and not modular enough.
        if isinstance(url_or_fetch_strategy, string_types):
-            self.fetcher = fs.from_url(url_or_fetch_strategy)
+            self.fetcher = fs.from_url_scheme(url_or_fetch_strategy)
        elif isinstance(url_or_fetch_strategy, fs.FetchStrategy):
            self.fetcher = url_or_fetch_strategy
        else:
@ -397,16 +400,9 @@ def fetch(self, mirror_only=False):
        # TODO: CompositeFetchStrategy here.
        self.skip_checksum_for_mirror = True
        if self.mirror_path:
-            mirrors = spack.config.get('mirrors')
+            urls = [
-
+                url_util.join(mirror.fetch_url, self.mirror_path)
-            # Join URLs of mirror roots with mirror paths. Because
+                for mirror in spack.mirror.MirrorCollection().values()]
            # urljoin() will strip everything past the final '/' in
            # the root, so we add a '/' if it is not present.
            mir_roots = [
                sup.substitute_path_variables(root) if root.endswith(os.sep)
                else sup.substitute_path_variables(root) + os.sep
                for root in mirrors.values()]
            urls = [urljoin(root, self.mirror_path) for root in mir_roots]
            # If this archive is normally fetched from a tarball URL,
            # then use the same digest.  `spack mirror` ensures that
@ -425,9 +421,12 @@ def fetch(self, mirror_only=False):
            # Add URL strategies for all the mirrors with the digest
            for url in urls:
-                fetchers.insert(
+                fetchers.append(fs.from_url_scheme(
-                    0, fs.URLFetchStrategy(
+                    url, digest, expand=expand, extension=extension))
-                        url, digest, expand=expand, extension=extension))
+                # fetchers.insert(
                #     0, fs.URLFetchStrategy(
                #         url, digest, expand=expand, extension=extension))
            if self.default_fetcher.cachable:
                fetchers.insert(
                    0, spack.caches.fetch_cache.fetcher(
@ -708,6 +707,91 @@ def purge():
                remove_linked_tree(stage_path)
 def get_checksums_for_versions(
        url_dict, name, first_stage_function=None, keep_stage=False):
    """Fetches and checksums archives from URLs.
    This function is called by both ``spack checksum`` and ``spack
    create``.  The ``first_stage_function`` argument allows the caller to
    inspect the first downloaded archive, e.g., to determine the build
    system.
    Args:
        url_dict (dict): A dictionary of the form: version -> URL
        name (str): The name of the package
        first_stage_function (callable): function that takes a Stage and a URL;
            this is run on the stage of the first URL downloaded
        keep_stage (bool): whether to keep staging area when command completes
    Returns:
        (str): A multi-line string containing versions and corresponding hashes
    """
    sorted_versions = sorted(url_dict.keys(), reverse=True)
    # Find length of longest string in the list for padding
    max_len = max(len(str(v)) for v in sorted_versions)
    num_ver = len(sorted_versions)
    tty.msg("Found {0} version{1} of {2}:".format(
            num_ver, '' if num_ver == 1 else 's', name),
            "",
            *spack.cmd.elide_list(
                ["{0:{1}}  {2}".format(str(v), max_len, url_dict[v])
                 for v in sorted_versions]))
    tty.msg('')
    archives_to_fetch = tty.get_number(
        "How many would you like to checksum?", default=1, abort='q')
    if not archives_to_fetch:
        tty.die("Aborted.")
    versions = sorted_versions[:archives_to_fetch]
    urls = [url_dict[v] for v in versions]
    tty.msg("Downloading...")
    version_hashes = []
    i = 0
    for url, version in zip(urls, versions):
        try:
            with Stage(url, keep=keep_stage) as stage:
                # Fetch the archive
                stage.fetch()
                if i == 0 and first_stage_function:
                    # Only run first_stage_function the first time,
                    # no need to run it every time
                    first_stage_function(stage, url)
                # Checksum the archive and add it to the list
                version_hashes.append((version, spack.util.crypto.checksum(
                    hashlib.sha256, stage.archive_file)))
                i += 1
        except FailedDownloadError:
            tty.msg("Failed to fetch {0}".format(url))
        except Exception as e:
            tty.msg("Something failed on {0}, skipping.".format(url),
                    "  ({0})".format(e))
    if not version_hashes:
        tty.die("Could not fetch any versions for {0}".format(name))
    # Find length of longest string in the list for padding
    max_len = max(len(str(v)) for v, h in version_hashes)
    # Generate the version directives to put in a package.py
    version_lines = "\n".join([
        "    version('{0}', {1}sha256='{2}')".format(
            v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
    ])
    num_hash = len(version_hashes)
    tty.msg("Checksummed {0} version{1} of {2}".format(
        num_hash, '' if num_hash == 1 else 's', name))
    return version_lines
 class StageError(spack.error.SpackError):
    """"Superclass for all errors encountered during staging."""
@ -720,5 +804,9 @@ class RestageError(StageError):
    """"Error encountered during restaging."""
 class VersionFetchError(StageError):
    """Raised when we can't determine a URL to fetch a package."""
 # Keep this in namespace for convenience
 FailedDownloadError = fs.FailedDownloadError
--- a/lib/spack/spack/test/cmd/pkg.py
+++ b/lib/spack/spack/test/cmd/pkg.py
@ -53,6 +53,8 @@ def mock_pkg_git_repo(tmpdir_factory):
        # initial commit with mock packages
        git('add', '.')
        git('config', 'user.email', 'testing@spack.io')
        git('config', 'user.name', 'Spack Testing')
        git('commit', '-m', 'initial mock repo commit')
        # add commit with pkg-a, pkg-b, pkg-c packages
--- a/lib/spack/spack/test/config.py
+++ b/lib/spack/spack/test/config.py
@ -595,6 +595,7 @@ def test_bad_config_section(mock_config):
        spack.config.get('foobar')
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_bad_command_line_scopes(tmpdir, mock_config):
    cfg = spack.config.Configuration()
--- a/lib/spack/spack/test/llnl/util/lock.py
+++ b/lib/spack/spack/test/llnl/util/lock.py
@ -546,6 +546,7 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path):
        timeout_write(lock_path, 5, 1))
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_read_lock_on_read_only_lockfile(lock_dir, lock_path):
    """read-only directory, read-only lockfile."""
    touch(lock_path)
@ -573,6 +574,7 @@ def test_read_lock_read_only_dir_writable_lockfile(lock_dir, lock_path):
            pass
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_read_lock_no_lockfile(lock_dir, lock_path):
    """read-only directory, no lockfile (so can't create)."""
    with read_only(lock_dir):
--- a/lib/spack/spack/test/stage.py
+++ b/lib/spack/spack/test/stage.py
@ -653,6 +653,7 @@ def test_source_path_available(self, mock_stage_archive):
        assert source_path.endswith(spack.stage._source_path_subdir)
        assert not os.path.exists(source_path)
    @pytest.mark.skipif(os.getuid() == 0, reason='user is root')
    def test_first_accessible_path(self, tmpdir):
        """Test _first_accessible_path names."""
        spack_dir = tmpdir.join('paths')
@ -783,6 +784,7 @@ def test_resolve_paths(self):
        assert spack.stage._resolve_paths(paths) == res_paths
    @pytest.mark.skipif(os.getuid() == 0, reason='user is root')
    def test_get_stage_root_bad_path(self, clear_stage_root):
        """Ensure an invalid stage path root raises a StageError."""
        with spack.config.override('config:build_stage', '/no/such/path'):
--- a/lib/spack/spack/util/s3.py
+++ b/lib/spack/spack/util/s3.py
@ -0,0 +1,44 @@
 # Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
 # Spack Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 import os
 import six.moves.urllib.parse as urllib_parse
 import spack
 import spack.util.url as url_util
 def create_s3_session(url):
    url = url_util.parse(url)
    if url.scheme != 's3':
        raise ValueError(
            'Can not create S3 session from URL with scheme: {SCHEME}'.format(
                SCHEME=url.scheme))
    # NOTE(opadron): import boto and friends as late as possible.  We don't
    # want to require boto as a dependency unless the user actually wants to
    # access S3 mirrors.
    from boto3 import Session
    session = Session()
    s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')}
    endpoint_url = os.environ.get('S3_ENDPOINT_URL')
    if endpoint_url:
        if urllib_parse.urlparse(endpoint_url, scheme=None).scheme is None:
            endpoint_url = '://'.join(('https', endpoint_url))
        s3_client_args['endpoint_url'] = endpoint_url
    # if no access credentials provided above, then access anonymously
    if not session.get_credentials():
        from botocore import UNSIGNED
        from botocore.client import Config
        s3_client_args["config"] = Config(signature_version=UNSIGNED)
    return session.client('s3', **s3_client_args)
--- a/lib/spack/spack/util/url.py
+++ b/lib/spack/spack/util/url.py
@ -0,0 +1,175 @@
 # Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
 # Spack Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 """
 Utility functions for parsing, formatting, and manipulating URLs.
 """
 import itertools
 import os.path
 from six import string_types
 import six.moves.urllib.parse as urllib_parse
 import spack.util.path
 def _split_all(path):
    """Split path into its atomic components.
    Returns the shortest list, L, of strings such that os.path.join(*L) == path
    and os.path.split(element) == ('', element) for every element in L except
    possibly the first.  This first element may possibly have the value of '/',
    or some other OS-dependent path root.
    """
    result = []
    a = path
    old_a = None
    while a != old_a:
        (old_a, (a, b)) = a, os.path.split(a)
        if a or b:
            result.insert(0, b or '/')
    return result
 def local_file_path(url):
    """Get a local file path from a url.
    If url is a file:// URL, return the absolute path to the local
    file or directory referenced by it.  Otherwise, return None.
    """
    if isinstance(url, string_types):
        url = parse(url)
    if url.scheme == 'file':
        return url.path
    return None
 def parse(url, scheme='file'):
    """Parse a mirror url.
    For file:// URLs, the netloc and path components are concatenated and
    passed through spack.util.path.canoncalize_path().
    Otherwise, the returned value is the same as urllib's urlparse() with
    allow_fragments=False.
    """
    url_obj = (
        urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False)
        if isinstance(url, string_types) else url)
    (scheme, netloc, path, params, query, _) = url_obj
    scheme = (scheme or 'file').lower()
    if scheme == 'file':
        path = spack.util.path.canonicalize_path(netloc + path)
        while path.startswith('//'):
            path = path[1:]
        netloc = ''
    return urllib_parse.ParseResult(scheme=scheme,
                                    netloc=netloc,
                                    path=path,
                                    params=params,
                                    query=query,
                                    fragment=None)
 def format(parsed_url):
    """Format a URL string
    Returns a canonicalized format of the given URL as a string.
    """
    if isinstance(parsed_url, string_types):
        parsed_url = parse(parsed_url)
    return parsed_url.geturl()
 def join(base_url, path, *extra, **kwargs):
    """Joins a base URL with one or more local URL path components
    If resolve_href is True, treat the base URL as though it where the locator
    of a web page, and the remaining URL path components as though they formed
    a relative URL to be resolved against it (i.e.: as in os.path.join(...)).
    The result is an absolute URL to the resource to which a user's browser
    would navigate if they clicked on a link with an "href" attribute equal to
    the relative URL.
    If resolve_href is False (default), then the URL path components are joined
    as in os.path.join().
    Examples:
      base_url = 's3://bucket/index.html'
      body = fetch_body(prefix)
      link = get_href(body) # link == '../other-bucket/document.txt'
      # wrong - link is a local URL that needs to be resolved against base_url
      spack.util.url.join(base_url, link)
      's3://bucket/other_bucket/document.txt'
      # correct - resolve local URL against base_url
      spack.util.url.join(base_url, link, resolve_href=True)
      's3://other_bucket/document.txt'
      prefix = 'https://mirror.spack.io/build_cache'
      # wrong - prefix is just a URL prefix
      spack.util.url.join(prefix, 'my-package', resolve_href=True)
      'https://mirror.spack.io/my-package'
      # correct - simply append additional URL path components
      spack.util.url.join(prefix, 'my-package', resolve_href=False) # default
      'https://mirror.spack.io/build_cache/my-package'
    """
    base_url = parse(base_url)
    resolve_href = kwargs.get('resolve_href', False)
    (scheme, netloc, base_path, params, query, _) = base_url
    scheme = scheme.lower()
    path_tokens = [
        part for part in itertools.chain(
            _split_all(path),
            itertools.chain.from_iterable(
                _split_all(extra_path) for extra_path in extra))
        if part and part != '/']
    base_path_args = ['/fake-root']
    if scheme == 's3':
        if netloc:
            base_path_args.append(netloc)
    if base_path.startswith('/'):
        base_path = base_path[1:]
    base_path_args.append(base_path)
    if resolve_href:
        new_base_path, _ = os.path.split(os.path.join(*base_path_args))
        base_path_args = [new_base_path]
    base_path_args.extend(path_tokens)
    base_path = os.path.relpath(os.path.join(*base_path_args), '/fake-root')
    if scheme == 's3':
        path_tokens = [
            part for part in _split_all(base_path)
            if part and part != '/']
        if path_tokens:
            netloc = path_tokens.pop(0)
            base_path = os.path.join('', *path_tokens)
    return format(urllib_parse.ParseResult(scheme=scheme,
                                           netloc=netloc,
                                           path=base_path,
                                           params=params,
                                           query=query,
                                           fragment=None))
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@ -5,16 +5,21 @@
 from __future__ import print_function
 import codecs
 import errno
 import re
 import os
 import os.path
 import shutil
 import ssl
 import sys
 import traceback
 import hashlib
 from itertools import product
 import six
 from six.moves.urllib.request import urlopen, Request
 from six.moves.urllib.error import URLError
 from six.moves.urllib.parse import urljoin
 import multiprocessing.pool
 try:
@ -28,20 +33,47 @@
    class HTMLParseError(Exception):
        pass
 from llnl.util.filesystem import mkdirp
 import llnl.util.tty as tty
 import spack.config
 import spack.cmd
-import spack.url
+import spack.config
 import spack.stage
 import spack.error
 import spack.url
 import spack.util.crypto
 import spack.util.s3 as s3_util
 import spack.util.url as url_util
 from spack.util.compression import ALLOWED_ARCHIVE_TYPES
 # Timeout in seconds for web requests
 _timeout = 10
 # See docstring for standardize_header_names()
 _separators = ('', ' ', '_', '-')
 HTTP_HEADER_NAME_ALIASES = {
    "Accept-ranges": set(
        ''.join((A, 'ccept', sep, R, 'anges'))
        for A, sep, R in product('Aa', _separators, 'Rr')),
    "Content-length": set(
        ''.join((C, 'ontent', sep, L, 'ength'))
        for C, sep, L in product('Cc', _separators, 'Ll')),
    "Content-type": set(
        ''.join((C, 'ontent', sep, T, 'ype'))
        for C, sep, T in product('Cc', _separators, 'Tt')),
    "Date": set(('Date', 'date')),
    "Last-modified": set(
        ''.join((L, 'ast', sep, M, 'odified'))
        for L, sep, M in product('Ll', _separators, 'Mm')),
    "Server": set(('Server', 'server'))
 }
 class LinkParser(HTMLParser):
    """This parser just takes an HTML page and strips out the hrefs on the
@ -59,7 +91,7 @@ def handle_starttag(self, tag, attrs):
 class NonDaemonProcess(multiprocessing.Process):
-    """Process tha allows sub-processes, so pools can have sub-pools."""
+    """Process that allows sub-processes, so pools can have sub-pools."""
    @property
    def daemon(self):
        return False
@ -86,25 +118,53 @@ def __init__(self, *args, **kwargs):
            super(NonDaemonPool, self).__init__(*args, **kwargs)
-def _read_from_url(url, accept_content_type=None):
+def uses_ssl(parsed_url):
    if parsed_url.scheme == 'https':
        return True
    if parsed_url.scheme == 's3':
        endpoint_url = os.environ.get('S3_ENDPOINT_URL')
        if not endpoint_url:
            return True
        if url_util.parse(endpoint_url, scheme='https').scheme == 'https':
            return True
    return False
 __UNABLE_TO_VERIFY_SSL = (
    lambda pyver: (
        (pyver < (2, 7, 9)) or
        ((3,) < pyver < (3, 4, 3))
    ))(sys.version_info)
 def read_from_url(url, accept_content_type=None):
    url = url_util.parse(url)
    context = None
    verify_ssl = spack.config.get('config:verify_ssl')
-    pyver = sys.version_info
+
-    if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)):
+    # Don't even bother with a context unless the URL scheme is one that uses
    # SSL certs.
    if uses_ssl(url):
        if verify_ssl:
-            tty.warn("Spack will not check SSL certificates. You need to "
+            if __UNABLE_TO_VERIFY_SSL:
-                     "update your Python to enable certificate "
+                # User wants SSL verification, but it cannot be provided.
-                     "verification.")
+                warn_no_ssl_cert_checking()
-    elif verify_ssl:
+            else:
-        # without a defined context, urlopen will not verify the ssl cert for
+                # User wants SSL verification, and it *can* be provided.
-        # python 3.x
+                context = ssl.create_default_context()
-        context = ssl.create_default_context()
+        else:
-    else:
+            # User has explicitly indicated that they do not want SSL
-        context = ssl._create_unverified_context()
+            # verification.
            context = ssl._create_unverified_context()
-    req = Request(url)
+    req = Request(url_util.format(url))
-
+    content_type = None
-    if accept_content_type:
+    is_web_url = url.scheme in ('http', 'https')
    if accept_content_type and is_web_url:
        # Make a HEAD request first to check the content type.  This lets
        # us ignore tarballs and gigantic files.
        # It would be nice to do this with the HTTP Accept header to avoid
@ -113,29 +173,179 @@ def _read_from_url(url, accept_content_type=None):
        req.get_method = lambda: "HEAD"
        resp = _urlopen(req, timeout=_timeout, context=context)
-        if "Content-type" not in resp.headers:
+        content_type = resp.headers.get('Content-type')
            tty.debug("ignoring page " + url)
            return None, None
        if not resp.headers["Content-type"].startswith(accept_content_type):
            tty.debug("ignoring page " + url + " with content type " +
                      resp.headers["Content-type"])
            return None, None
    # Do the real GET request when we know it's just HTML.
    req.get_method = lambda: "GET"
    response = _urlopen(req, timeout=_timeout, context=context)
    response_url = response.geturl()
-    # Read the page and and stick it in the map we'll return
+    if accept_content_type and not is_web_url:
-    page = response.read().decode('utf-8')
+        content_type = response.headers.get('Content-type')
-    return response_url, page
+    reject_content_type = (
        accept_content_type and (
            content_type is None or
            not content_type.startswith(accept_content_type)))
    if reject_content_type:
        tty.debug("ignoring page {0}{1}{2}".format(
            url_util.format(url),
            " with content type " if content_type is not None else "",
            content_type or ""))
        return None, None, None
    return response.geturl(), response.headers, response
-def read_from_url(url, accept_content_type=None):
+def warn_no_ssl_cert_checking():
-    resp_url, contents = _read_from_url(url, accept_content_type)
+    tty.warn("Spack will not check SSL certificates. You need to update "
-    return contents
+             "your Python to enable certificate verification.")
 def push_to_url(local_path, remote_path, **kwargs):
    keep_original = kwargs.get('keep_original', True)
    local_url = url_util.parse(local_path)
    local_file_path = url_util.local_file_path(local_url)
    if local_file_path is None:
        raise ValueError('local path must be a file:// url')
    remote_url = url_util.parse(remote_path)
    verify_ssl = spack.config.get('config:verify_ssl')
    if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url):
        warn_no_ssl_cert_checking()
    remote_file_path = url_util.local_file_path(remote_url)
    if remote_file_path is not None:
        mkdirp(os.path.dirname(remote_file_path))
        if keep_original:
            shutil.copy(local_file_path, remote_file_path)
        else:
            try:
                os.rename(local_file_path, remote_file_path)
            except OSError as e:
                if e.errno == errno.EXDEV:
                    # NOTE(opadron): The above move failed because it crosses
                    # filesystem boundaries.  Copy the file (plus original
                    # metadata), and then delete the original.  This operation
                    # needs to be done in separate steps.
                    shutil.copy2(local_file_path, remote_file_path)
                    os.remove(local_file_path)
    elif remote_url.scheme == 's3':
        extra_args = kwargs.get('extra_args', {})
        remote_path = remote_url.path
        while remote_path.startswith('/'):
            remote_path = remote_path[1:]
        s3 = s3_util.create_s3_session(remote_url)
        s3.upload_file(local_file_path, remote_url.netloc,
                       remote_path, ExtraArgs=extra_args)
        if not keep_original:
            os.remove(local_file_path)
    else:
        raise NotImplementedError(
            'Unrecognized URL scheme: {SCHEME}'.format(
                SCHEME=remote_url.scheme))
 def url_exists(url):
    url = url_util.parse(url)
    local_path = url_util.local_file_path(url)
    if local_path:
        return os.path.exists(local_path)
    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(url)
        from botocore.exceptions import ClientError
        try:
            s3.get_object(Bucket=url.netloc, Key=url.path)
            return True
        except ClientError as err:
            if err.response['Error']['Code'] == 'NoSuchKey':
                return False
            raise err
    # otherwise, just try to "read" from the URL, and assume that *any*
    # non-throwing response contains the resource represented by the URL
    try:
        read_from_url(url)
        return True
    except URLError:
        return False
 def remove_url(url):
    url = url_util.parse(url)
    local_path = url_util.local_file_path(url)
    if local_path:
        os.remove(local_path)
        return
    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(url)
        s3.delete_object(Bucket=url.s3_bucket, Key=url.path)
        return
    # Don't even try for other URL schemes.
 def _list_s3_objects(client, url, num_entries, start_after=None):
    list_args = dict(
        Bucket=url.netloc,
        Prefix=url.path,
        MaxKeys=num_entries)
    if start_after is not None:
        list_args['StartAfter'] = start_after
    result = client.list_objects_v2(**list_args)
    last_key = None
    if result['IsTruncated']:
        last_key = result['Contents'][-1]['Key']
    iter = (key for key in
            (
                os.path.relpath(entry['Key'], url.path)
                for entry in result['Contents']
            )
            if key != '.')
    return iter, last_key
 def _iter_s3_prefix(client, url, num_entries=1024):
    key = None
    while True:
        contents, key = _list_s3_objects(
            client, url, num_entries, start_after=key)
        for x in contents:
            yield x
        if not key:
            break
 def list_url(url):
    url = url_util.parse(url)
    local_path = url_util.local_file_path(url)
    if local_path:
        return os.listdir(local_path)
    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(url)
        return list(set(
            key.split('/', 1)[0]
            for key in _iter_s3_prefix(s3, url)))
 def _spider(url, visited, root, depth, max_depth, raise_on_error):
@ -154,16 +364,12 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
    pages = {}     # dict from page URL -> text content.
    links = set()  # set of all links seen on visited pages.
    # root may end with index.html -- chop that off.
    if root.endswith('/index.html'):
        root = re.sub('/index.html$', '', root)
    try:
-        response_url, page = _read_from_url(url, 'text/html')
+        response_url, _, response = read_from_url(url, 'text/html')
-
+        if not response_url or not response:
        if not response_url or not page:
            return pages, links
        page = codecs.getreader('utf-8')(response).read()
        pages[response_url] = page
        # Parse out the links in the page
@ -173,8 +379,10 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
        while link_parser.links:
            raw_link = link_parser.links.pop()
-            abs_link = urljoin(response_url, raw_link.strip())
+            abs_link = url_util.join(
-
+                response_url,
                raw_link.strip(),
                resolve_href=True)
            links.add(abs_link)
            # Skip stuff that looks like an archive
@ -243,16 +451,28 @@ def _spider_wrapper(args):
    return _spider(*args)
-def _urlopen(*args, **kwargs):
+def _urlopen(req, *args, **kwargs):
    """Wrapper for compatibility with old versions of Python."""
-    # We don't pass 'context' parameter to urlopen because it
+    url = req
-    # was introduces only starting versions 2.7.9 and 3.4.3 of Python.
+    try:
-    if 'context' in kwargs and kwargs['context'] is None:
+        url = url.get_full_url()
    except AttributeError:
        pass
    # We don't pass 'context' parameter because it was only introduced starting
    # with versions 2.7.9 and 3.4.3 of Python.
    if 'context' in kwargs:
        del kwargs['context']
-    return urlopen(*args, **kwargs)
+
    opener = urlopen
    if url_util.parse(url).scheme == 's3':
        import spack.s3_handler
        opener = spack.s3_handler.open
    return opener(req, *args, **kwargs)
-def spider(root_url, depth=0):
+def spider(root, depth=0):
    """Gets web pages from a root URL.
       If depth is specified (e.g., depth=2), then this will also follow
@ -262,7 +482,9 @@ def spider(root_url, depth=0):
       performance over a sequential fetch.
    """
-    pages, links = _spider(root_url, set(), root_url, 0, depth, False)
+
    root = url_util.parse(root)
    pages, links = _spider(root, set(), root, 0, depth, False)
    return pages, links
@ -356,99 +578,112 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
    return versions
-def get_checksums_for_versions(
+def standardize_header_names(headers):
-        url_dict, name, first_stage_function=None, keep_stage=False):
+    """Replace certain header names with standardized spellings.
    """Fetches and checksums archives from URLs.
-    This function is called by both ``spack checksum`` and ``spack
+    Standardizes the spellings of the following header names:
-    create``.  The ``first_stage_function`` argument allows the caller to
+    - Accept-ranges
-    inspect the first downloaded archive, e.g., to determine the build
+    - Content-length
-    system.
+    - Content-type
    - Date
    - Last-modified
    - Server
-    Args:
+    Every name considered is translated to one of the above names if the only
-        url_dict (dict): A dictionary of the form: version -> URL
+    difference between the two is how the first letters of each word are
-        name (str): The name of the package
+    capitalized; whether words are separated; or, if separated, whether they
-        first_stage_function (callable): function that takes a Stage and a URL;
+    are so by a dash (-), underscore (_), or space ( ).  Header names that
-            this is run on the stage of the first URL downloaded
+    cannot be mapped as described above are returned unaltered.
        keep_stage (bool): whether to keep staging area when command completes
-    Returns:
+    For example: The standard spelling of "Content-length" would be substituted
-        (str): A multi-line string containing versions and corresponding hashes
+    for any of the following names:
    - Content-length
    - content_length
    - contentlength
    - content_Length
    - contentLength
    - content Length
    ... and any other header name, such as "Content-encoding", would not be
    altered, regardless of spelling.
    If headers is a string, then it (or an appropriate substitute) is returned.
    If headers is a non-empty tuple, headers[0] is a string, and there exists a
    standardized spelling for header[0] that differs from it, then a new tuple
    is returned.  This tuple has the same elements as headers, except the first
    element is the standardized spelling for headers[0].
    If headers is a sequence, then a new list is considered, where each element
    is its corresponding element in headers, but mapped as above if a string or
    tuple.  This new list is returned if at least one of its elements differ
    from their corrsponding element in headers.
    If headers is a mapping, then a new dict is considered, where the key in
    each item is the key of its corresponding item in headers, mapped as above
    if a string or tuple.  The value is taken from the corresponding item.  If
    the keys of multiple items in headers map to the same key after being
    standardized, then the value for the resulting item is undefined.  The new
    dict is returned if at least one of its items has a key that differs from
    that of their corresponding item in headers, or if the keys of multiple
    items in headers map to the same key after being standardized.
    In all other cases headers is returned unaltered.
    """
-    sorted_versions = sorted(url_dict.keys(), reverse=True)
+    if isinstance(headers, six.string_types):
        for standardized_spelling, other_spellings in (
                HTTP_HEADER_NAME_ALIASES.items()):
            if headers in other_spellings:
                if headers == standardized_spelling:
                    return headers
                return standardized_spelling
        return headers
-    # Find length of longest string in the list for padding
+    if isinstance(headers, tuple):
-    max_len = max(len(str(v)) for v in sorted_versions)
+        if not headers:
-    num_ver = len(sorted_versions)
+            return headers
        old = headers[0]
        if isinstance(old, six.string_types):
            new = standardize_header_names(old)
            if old is not new:
                return (new,) + headers[1:]
        return headers
-    tty.msg("Found {0} version{1} of {2}:".format(
+    try:
-            num_ver, '' if num_ver == 1 else 's', name),
+        changed = False
-            "",
+        new_dict = {}
-            *spack.cmd.elide_list(
+        for key, value in headers.items():
-                ["{0:{1}}  {2}".format(str(v), max_len, url_dict[v])
+            if isinstance(key, (tuple, six.string_types)):
-                 for v in sorted_versions]))
+                old_key, key = key, standardize_header_names(key)
-    print()
+                changed = changed or key is not old_key
-    archives_to_fetch = tty.get_number(
+            new_dict[key] = value
        "How many would you like to checksum?", default=1, abort='q')
-    if not archives_to_fetch:
+        return new_dict if changed else headers
-        tty.die("Aborted.")
+    except (AttributeError, TypeError, ValueError):
        pass
-    versions = sorted_versions[:archives_to_fetch]
+    try:
-    urls = [url_dict[v] for v in versions]
+        changed = False
        new_list = []
        for item in headers:
            if isinstance(item, (tuple, six.string_types)):
                old_item, item = item, standardize_header_names(item)
                changed = changed or item is not old_item
-    tty.msg("Downloading...")
+            new_list.append(item)
    version_hashes = []
    i = 0
    for url, version in zip(urls, versions):
        try:
            with spack.stage.Stage(url, keep=keep_stage) as stage:
                # Fetch the archive
                stage.fetch()
                if i == 0 and first_stage_function:
                    # Only run first_stage_function the first time,
                    # no need to run it every time
                    first_stage_function(stage, url)
-                # Checksum the archive and add it to the list
+        return new_list if changed else headers
-                version_hashes.append((version, spack.util.crypto.checksum(
+    except TypeError:
-                    hashlib.sha256, stage.archive_file)))
+        pass
                i += 1
        except spack.stage.FailedDownloadError:
            tty.msg("Failed to fetch {0}".format(url))
        except Exception as e:
            tty.msg("Something failed on {0}, skipping.".format(url),
                    "  ({0})".format(e))
-    if not version_hashes:
+    return headers
        tty.die("Could not fetch any versions for {0}".format(name))
    # Find length of longest string in the list for padding
    max_len = max(len(str(v)) for v, h in version_hashes)
    # Generate the version directives to put in a package.py
    version_lines = "\n".join([
        "    version('{0}', {1}sha256='{2}')".format(
            v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
    ])
    num_hash = len(version_hashes)
    tty.msg("Checksummed {0} version{1} of {2}".format(
        num_hash, '' if num_hash == 1 else 's', name))
    return version_lines
 class SpackWebError(spack.error.SpackError):
    """Superclass for Spack web spidering errors."""
 class VersionFetchError(SpackWebError):
    """Raised when we can't determine a URL to fetch a package."""
 class NoNetworkConnectionError(SpackWebError):
    """Raised when an operation can't get an internet connection."""
    def __init__(self, message, url):