fetching: S3 upload and download (#11117)

This extends Spack functionality so that it can fetch sources and binaries from-, push sources and binaries to-, and index the contents of- mirrors hosted on an S3 bucket. High level to-do list: - [x] Extend mirrors configuration to add support for `file://`, and `s3://` URLs. - [x] Ensure all fetching, pushing, and indexing operations work for `file://` URLs. - [x] Implement S3 source fetching - [x] Implement S3 binary mirror indexing - [x] Implement S3 binary package fetching - [x] Implement S3 source pushing - [x] Implement S3 binary package pushing Important details: * refactor URL handling to handle S3 URLs and mirror URLs more gracefully. - updated parse() to accept already-parsed URL objects. an equivalent object is returned with any extra s3-related attributes intact. Objects created with urllib can also be passed, and the additional s3 handling logic will still be applied. * update mirror schema/parsing (mirror can have separate fetch/push URLs) * implement s3_fetch_strategy/several utility changes * provide more feature-complete S3 fetching * update buildcache create command to support S3 * Move the core logic for reading data from S3 out of the s3 fetch strategy and into the s3 URL handler. The s3 fetch strategy now calls into `read_from_url()` Since read_from_url can now handle S3 URLs, the S3 fetch strategy is redundant. It's not clear whether the ideal design is to have S3 fetching functionality in a fetch strategy, directly implemented in read_from_url, or both. * expanded what can be passed to `spack buildcache` via the -d flag: In addition to a directory on the local filesystem, the name of a configured mirror can be passed, or a push URL can be passed directly.
2019-10-22 03:32:04 -04:00 · 2019-10-22 03:32:04 -04:00 · fd58c98b0e
commit fd58c98b0e
parent 6cb972a9d2
21 changed files with 1411 additions and 280 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,3 +1,8 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
 generate ci jobs:
  script:
    - "./bin/generate-gitlab-ci-yml.sh"
--- a/bin/rebuild-index.sh
+++ b/bin/rebuild-index.sh
@ -10,4 +10,4 @@ set -x
 SPACK_BIN_DIR="${CI_PROJECT_DIR}/bin"
 export PATH="${SPACK_BIN_DIR}:${PATH}"

-spack upload-s3 index
+spack buildcache update-index -d "$MIRROR_URL"
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)

+import codecs
 import os
 import re
 import tarfile
@ -23,14 +24,32 @@
 import spack.util.gpg as gpg_util
 import spack.relocate as relocate
 import spack.util.spack_yaml as syaml
+import spack.mirror
+import spack.util.url as url_util
+import spack.util.web as web_util
+
 from spack.spec import Spec
 from spack.stage import Stage
 from spack.util.gpg import Gpg
-from spack.util.web import spider, read_from_url
 from spack.util.executable import ProcessError

 _build_cache_relative_path = 'build_cache'

+BUILD_CACHE_INDEX_TEMPLATE = '''
+<html>
+<head>
+  <title>{title}</title>
+</head>
+<body>
+<ul>
+{path_list}
+</ul>
+</body>
+</html>
+'''
+
+BUILD_CACHE_INDEX_ENTRY_TEMPLATE = '  <li><a href="{path}">{path}</a></li>'
+

 class NoOverwriteException(Exception):
    """
@ -101,7 +120,7 @@ def build_cache_relative_path():
    return _build_cache_relative_path


-def build_cache_directory(prefix):
+def build_cache_prefix(prefix):
    return os.path.join(prefix, build_cache_relative_path())


@ -246,29 +265,36 @@ def sign_tarball(key, force, specfile_path):
    Gpg.sign(key, specfile_path, '%s.asc' % specfile_path)


-def _generate_html_index(path_list, output_path):
-    f = open(output_path, 'w')
-    header = """<html>\n
-<head>\n</head>\n
-<list>\n"""
-    footer = "</list>\n</html>\n"
-    f.write(header)
-    for path in path_list:
-        rel = os.path.basename(path)
-        f.write('<li><a href="%s"> %s</a>\n' % (rel, rel))
-    f.write(footer)
-    f.close()
+def generate_package_index(cache_prefix):
+    """Create the build cache index page.

+    Creates (or replaces) the "index.html" page at the location given in
+    cache_prefix.  This page contains a link for each binary package (*.yaml)
+    and signing key (*.key) under cache_prefix.
+    """
+    tmpdir = tempfile.mkdtemp()
+    try:
+        index_html_path = os.path.join(tmpdir, 'index.html')
+        file_list = (
+            entry
+            for entry in web_util.list_url(cache_prefix)
+            if (entry.endswith('.yaml')
+                or entry.endswith('.key')))

-def generate_package_index(build_cache_dir):
-    yaml_list = os.listdir(build_cache_dir)
-    path_list = [os.path.join(build_cache_dir, l) for l in yaml_list]
+        with open(index_html_path, 'w') as f:
+            f.write(BUILD_CACHE_INDEX_TEMPLATE.format(
+                title='Spack Package Index',
+                path_list='\n'.join(
+                    BUILD_CACHE_INDEX_ENTRY_TEMPLATE.format(path=path)
+                    for path in file_list)))

-    index_html_path_tmp = os.path.join(build_cache_dir, 'index.html.tmp')
-    index_html_path = os.path.join(build_cache_dir, 'index.html')
-
-    _generate_html_index(path_list, index_html_path_tmp)
-    shutil.move(index_html_path_tmp, index_html_path)
+        web_util.push_to_url(
+            index_html_path,
+            url_util.join(cache_prefix, 'index.html'),
+            keep_original=False,
+            extra_args={'ContentType': 'text/html'})
+    finally:
+        shutil.rmtree(tmpdir)


 def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
@ -281,33 +307,41 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
        raise ValueError('spec must be concrete to build tarball')

    # set up some paths
-    build_cache_dir = build_cache_directory(outdir)
+    tmpdir = tempfile.mkdtemp()
+    cache_prefix = build_cache_prefix(tmpdir)

    tarfile_name = tarball_name(spec, '.tar.gz')
-    tarfile_dir = os.path.join(build_cache_dir,
-                               tarball_directory_name(spec))
+    tarfile_dir = os.path.join(cache_prefix, tarball_directory_name(spec))
    tarfile_path = os.path.join(tarfile_dir, tarfile_name)
-    mkdirp(tarfile_dir)
    spackfile_path = os.path.join(
-        build_cache_dir, tarball_path_name(spec, '.spack'))
-    if os.path.exists(spackfile_path):
+        cache_prefix, tarball_path_name(spec, '.spack'))
+
+    remote_spackfile_path = url_util.join(
+        outdir, os.path.relpath(spackfile_path, tmpdir))
+
+    mkdirp(tarfile_dir)
+    if web_util.url_exists(remote_spackfile_path):
        if force:
-            os.remove(spackfile_path)
+            web_util.remove_url(remote_spackfile_path)
        else:
-            raise NoOverwriteException(str(spackfile_path))
+            raise NoOverwriteException(url_util.format(remote_spackfile_path))
+
    # need to copy the spec file so the build cache can be downloaded
    # without concretizing with the current spack packages
    # and preferences
    spec_file = os.path.join(spec.prefix, ".spack", "spec.yaml")
    specfile_name = tarball_name(spec, '.spec.yaml')
    specfile_path = os.path.realpath(
-        os.path.join(build_cache_dir, specfile_name))
+        os.path.join(cache_prefix, specfile_name))

-    if os.path.exists(specfile_path):
+    remote_specfile_path = url_util.join(
+        outdir, os.path.relpath(specfile_path, os.path.realpath(tmpdir)))
+
+    if web_util.url_exists(remote_specfile_path):
        if force:
-            os.remove(specfile_path)
+            web_util.remove_url(remote_specfile_path)
        else:
-            raise NoOverwriteException(str(specfile_path))
+            raise NoOverwriteException(url_util.format(remote_specfile_path))

    # make a copy of the install directory to work with
    workdir = os.path.join(tempfile.mkdtemp(), os.path.basename(spec.prefix))
@ -324,6 +358,7 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
        except Exception as e:
            shutil.rmtree(workdir)
            shutil.rmtree(tarfile_dir)
+            shutil.rmtree(tmpdir)
            tty.die(e)
    else:
        try:
@ -331,7 +366,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
        except Exception as e:
            shutil.rmtree(workdir)
            shutil.rmtree(tarfile_dir)
+            shutil.rmtree(tmpdir)
            tty.die(e)
+
    # create compressed tarball of the install prefix
    with closing(tarfile.open(tarfile_path, 'w:gz')) as tar:
        tar.add(name='%s' % workdir,
@ -360,7 +397,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
    spec_dict['full_hash'] = spec.full_hash()

    tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format(
-        spec_dict['full_hash'], spec.name, specfile_path))
+        spec_dict['full_hash'],
+        spec.name,
+        url_util.format(remote_specfile_path)))
    tty.debug(spec.tree())

    with open(specfile_path, 'w') as outfile:
@ -382,9 +421,19 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
    if not unsigned:
        os.remove('%s.asc' % specfile_path)

-    # create an index.html for the build_cache directory so specs can be found
-    if regenerate_index:
-        generate_package_index(build_cache_dir)
+    web_util.push_to_url(
+        spackfile_path, remote_spackfile_path, keep_original=False)
+    web_util.push_to_url(
+        specfile_path, remote_specfile_path, keep_original=False)
+
+    try:
+        # create an index.html for the build_cache directory so specs can be
+        # found
+        if regenerate_index:
+            generate_package_index(url_util.join(
+                outdir, os.path.relpath(cache_prefix, tmpdir)))
+    finally:
+        shutil.rmtree(tmpdir)

    return None

@ -394,13 +443,16 @@ def download_tarball(spec):
    Download binary tarball for given package into stage area
    Return True if successful
    """
-    mirrors = spack.config.get('mirrors')
-    if len(mirrors) == 0:
+    if not spack.mirror.MirrorCollection():
        tty.die("Please add a spack mirror to allow " +
                "download of pre-compiled packages.")
+
    tarball = tarball_path_name(spec, '.spack')
-    for mirror_name, mirror_url in mirrors.items():
-        url = mirror_url + '/' + _build_cache_relative_path + '/' + tarball
+
+    for mirror in spack.mirror.MirrorCollection().values():
+        url = url_util.join(
+            mirror.fetch_url, _build_cache_relative_path, tarball)
+
        # stage the tarball into standard place
        stage = Stage(url, name="build_cache", keep=True)
        try:
@ -408,6 +460,7 @@ def download_tarball(spec):
            return stage.save_filename
        except fs.FetchError:
            continue
+
    return None


@ -610,26 +663,29 @@ def get_specs(force=False):
        tty.debug("Using previously-retrieved specs")
        return _cached_specs

-    mirrors = spack.config.get('mirrors')
-    if len(mirrors) == 0:
-        tty.debug("No Spack mirrors are currently configured")
+    if not spack.mirror.MirrorCollection():
+        tty.warn("No Spack mirrors are currently configured")
        return {}

    urls = set()
-    for mirror_name, mirror_url in mirrors.items():
-        if mirror_url.startswith('file'):
-            mirror = mirror_url.replace(
-                'file://', '') + "/" + _build_cache_relative_path
-            tty.msg("Finding buildcaches in %s" % mirror)
-            if os.path.exists(mirror):
-                files = os.listdir(mirror)
+    for mirror in spack.mirror.MirrorCollection().values():
+        fetch_url_build_cache = url_util.join(
+            mirror.fetch_url, _build_cache_relative_path)
+
+        mirror_dir = url_util.local_file_path(fetch_url_build_cache)
+        if mirror_dir:
+            tty.msg("Finding buildcaches in %s" % mirror_dir)
+            if os.path.exists(mirror_dir):
+                files = os.listdir(mirror_dir)
                for file in files:
                    if re.search('spec.yaml', file):
-                        link = 'file://' + mirror + '/' + file
+                        link = url_util.join(fetch_url_build_cache, file)
                        urls.add(link)
        else:
-            tty.msg("Finding buildcaches on %s" % mirror_url)
-            p, links = spider(mirror_url + "/" + _build_cache_relative_path)
+            tty.msg("Finding buildcaches at %s" %
+                    url_util.format(fetch_url_build_cache))
+            p, links = web_util.spider(
+                url_util.join(fetch_url_build_cache, 'index.html'))
            for link in links:
                if re.search("spec.yaml", link):
                    urls.add(link)
@ -659,28 +715,33 @@ def get_keys(install=False, trust=False, force=False):
    """
    Get pgp public keys available on mirror
    """
-    mirrors = spack.config.get('mirrors')
-    if len(mirrors) == 0:
+    if not spack.mirror.MirrorCollection():
        tty.die("Please add a spack mirror to allow " +
                "download of build caches.")

    keys = set()
-    for mirror_name, mirror_url in mirrors.items():
-        if mirror_url.startswith('file'):
-            mirror = os.path.join(
-                mirror_url.replace('file://', ''), _build_cache_relative_path)
-            tty.msg("Finding public keys in %s" % mirror)
-            files = os.listdir(mirror)
+
+    for mirror in spack.mirror.MirrorCollection().values():
+        fetch_url_build_cache = url_util.join(
+            mirror.fetch_url, _build_cache_relative_path)
+
+        mirror_dir = url_util.local_file_path(fetch_url_build_cache)
+        if mirror_dir:
+            tty.msg("Finding public keys in %s" % mirror_dir)
+            files = os.listdir(mirror_dir)
            for file in files:
                if re.search(r'\.key', file):
-                    link = 'file://' + mirror + '/' + file
+                    link = url_util.join(fetch_url_build_cache, file)
                    keys.add(link)
        else:
-            tty.msg("Finding public keys on %s" % mirror_url)
-            p, links = spider(mirror_url + "/build_cache", depth=1)
+            tty.msg("Finding public keys at %s" %
+                    url_util.format(fetch_url_build_cache))
+            p, links = web_util.spider(fetch_url_build_cache, depth=1)
+
            for link in links:
                if re.search(r'\.key', link):
                    keys.add(link)
+
        for link in keys:
            with Stage(link, name="build_cache", keep=True) as stage:
                if os.path.exists(stage.save_filename) and force:
@ -717,15 +778,16 @@ def needs_rebuild(spec, mirror_url, rebuild_on_errors=False):
    # Try to retrieve the .spec.yaml directly, based on the known
    # format of the name, in order to determine if the package
    # needs to be rebuilt.
-    build_cache_dir = build_cache_directory(mirror_url)
+    cache_prefix = build_cache_prefix(mirror_url)
    spec_yaml_file_name = tarball_name(spec, '.spec.yaml')
-    file_path = os.path.join(build_cache_dir, spec_yaml_file_name)
+    file_path = os.path.join(cache_prefix, spec_yaml_file_name)

    result_of_error = 'Package ({0}) will {1}be rebuilt'.format(
        spec.short_spec, '' if rebuild_on_errors else 'not ')

    try:
-        yaml_contents = read_from_url(file_path)
+        _, _, yaml_file = web_util.read_from_url(file_path)
+        yaml_contents = codecs.getreader('utf-8')(yaml_file).read()
    except URLError as url_err:
        err_msg = [
            'Unable to determine whether {0} needs rebuilding,',
@ -782,22 +844,22 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,

    """
    rebuilds = {}
-    for mirror_name, mirror_url in mirrors.items():
-        tty.msg('Checking for built specs at %s' % mirror_url)
+    for mirror in spack.mirror.MirrorCollection(mirrors).values():
+        tty.msg('Checking for built specs at %s' % mirror.fetch_url)

        rebuild_list = []

        for spec in specs:
-            if needs_rebuild(spec, mirror_url, rebuild_on_errors):
+            if needs_rebuild(spec, mirror.fetch_url, rebuild_on_errors):
                rebuild_list.append({
                    'short_spec': spec.short_spec,
                    'hash': spec.dag_hash()
                })

        if rebuild_list:
-            rebuilds[mirror_url] = {
-                'mirrorName': mirror_name,
-                'mirrorUrl': mirror_url,
+            rebuilds[mirror.fetch_url] = {
+                'mirrorName': mirror.name,
+                'mirrorUrl': mirror.fetch_url,
                'rebuildSpecs': rebuild_list
            }

@ -810,33 +872,36 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,

 def _download_buildcache_entry(mirror_root, descriptions):
    for description in descriptions:
-        url = os.path.join(mirror_root, description['url'])
+        description_url = os.path.join(mirror_root, description['url'])
        path = description['path']
        fail_if_missing = description['required']

        mkdirp(path)

-        stage = Stage(url, name="build_cache", path=path, keep=True)
+        stage = Stage(
+            description_url, name="build_cache", path=path, keep=True)

        try:
            stage.fetch()
        except fs.FetchError as e:
            tty.debug(e)
            if fail_if_missing:
-                tty.error('Failed to download required url {0}'.format(url))
+                tty.error('Failed to download required url {0}'.format(
+                    description_url))
                return False

    return True


 def download_buildcache_entry(file_descriptions):
-    mirrors = spack.config.get('mirrors')
-    if len(mirrors) == 0:
+    if not spack.mirror.MirrorCollection():
        tty.die("Please add a spack mirror to allow " +
                "download of buildcache entries.")

-    for mirror_name, mirror_url in mirrors.items():
-        mirror_root = os.path.join(mirror_url, _build_cache_relative_path)
+    for mirror in spack.mirror.MirrorCollection().values():
+        mirror_root = os.path.join(
+            mirror.fetch_url,
+            _build_cache_relative_path)

        if _download_buildcache_entry(mirror_root, file_descriptions):
            return True
--- a/lib/spack/spack/caches.py
+++ b/lib/spack/spack/caches.py
@ -9,11 +9,13 @@
 import llnl.util.lang
 from llnl.util.filesystem import mkdirp

+import spack.error
 import spack.paths
 import spack.config
 import spack.fetch_strategy
 import spack.util.file_cache
-from spack.util.path import canonicalize_path
+import spack.util.path
+import spack.util.url as url_util


 def _misc_cache():
@ -25,7 +27,7 @@ def _misc_cache():
    path = spack.config.get('config:misc_cache')
    if not path:
        path = os.path.join(spack.paths.user_config_path, 'cache')
-    path = canonicalize_path(path)
+    path = spack.util.path.canonicalize_path(path)

    return spack.util.file_cache.FileCache(path)

@ -43,22 +45,26 @@ def _fetch_cache():
    path = spack.config.get('config:source_cache')
    if not path:
        path = os.path.join(spack.paths.var_path, "cache")
-    path = canonicalize_path(path)
+    path = spack.util.path.canonicalize_path(path)

    return spack.fetch_strategy.FsCache(path)


 class MirrorCache(object):
    def __init__(self, root):
-        self.root = os.path.abspath(root)
+        self.root = url_util.local_file_path(root)
+        if not self.root:
+            raise spack.error.SpackError(
+                'MirrorCaches only work with file:// URLs')
+
        self.new_resources = set()
        self.existing_resources = set()

    def store(self, fetcher, relative_dest):
        # Note this will archive package sources even if they would not
        # normally be cached (e.g. the current tip of an hg/git branch)
-
        dst = os.path.join(self.root, relative_dest)
+
        if os.path.exists(dst):
            self.existing_resources.add(relative_dest)
        else:
--- a/lib/spack/spack/cmd/buildcache.py
+++ b/lib/spack/spack/cmd/buildcache.py
@ -14,6 +14,7 @@
 import spack.cmd.common.arguments as arguments
 import spack.environment as ev
 import spack.hash_types as ht
+import spack.mirror
 import spack.relocate
 import spack.repo
 import spack.spec
@ -21,6 +22,8 @@
 import spack.config
 import spack.repo
 import spack.store
+import spack.util.url as url_util
+
 from spack.error import SpecError
 from spack.spec import Spec, save_dependency_spec_yamls

@ -205,6 +208,13 @@ def setup_parser(subparser):
        help='Destination mirror url')
    copy.set_defaults(func=buildcache_copy)

+    # Update buildcache index without copying any additional packages
+    update_index = subparsers.add_parser(
+        'update-index', help=buildcache_update_index.__doc__)
+    update_index.add_argument(
+        '-d', '--mirror-url', default=None, help='Destination mirror url')
+    update_index.set_defaults(func=buildcache_update_index)
+

 def find_matching_specs(pkgs, allow_multiple_matches=False, env=None):
    """Returns a list of specs matching the not necessarily
@ -312,9 +322,14 @@ def createtarball(args):
                " yaml file containing a spec to install")
    pkgs = set(packages)
    specs = set()
+
    outdir = '.'
    if args.directory:
        outdir = args.directory
+
+    mirror = spack.mirror.MirrorCollection().lookup(outdir)
+    outdir = url_util.format(mirror.push_url)
+
    signkey = None
    if args.key:
        signkey = args.key
@ -649,6 +664,19 @@ def buildcache_copy(args):
        shutil.copyfile(cdashid_src_path, cdashid_dest_path)


+def buildcache_update_index(args):
+    """Update a buildcache index."""
+    outdir = '.'
+    if args.mirror_url:
+        outdir = args.mirror_url
+
+    mirror = spack.mirror.MirrorCollection().lookup(outdir)
+    outdir = url_util.format(mirror.push_url)
+
+    bindist.generate_package_index(
+        url_util.join(outdir, bindist.build_cache_relative_path()))
+
+
 def buildcache(parser, args):
    if args.func:
        args.func(args)
--- a/lib/spack/spack/cmd/checksum.py
+++ b/lib/spack/spack/cmd/checksum.py
@ -11,8 +11,8 @@

 import spack.cmd
 import spack.repo
+import spack.stage
 import spack.util.crypto
-import spack.util.web
 from spack.util.naming import valid_fully_qualified_module_name
 from spack.version import ver, Version

@ -56,7 +56,7 @@ def checksum(parser, args):
        if not url_dict:
            tty.die("Could not find any versions for {0}".format(pkg.name))

-    version_lines = spack.util.web.get_checksums_for_versions(
+    version_lines = spack.stage.get_checksums_for_versions(
        url_dict, pkg.name, keep_stage=args.keep_stage)

    print()
--- a/lib/spack/spack/cmd/create.py
+++ b/lib/spack/spack/cmd/create.py
@ -13,6 +13,7 @@

 import spack.util.web
 import spack.repo
+import spack.stage
 from spack.spec import Spec
 from spack.util.editor import editor
 from spack.util.executable import which, ProcessError
@ -618,7 +619,7 @@ def get_versions(args, name):
            version = parse_version(args.url)
            url_dict = {version: args.url}

-        versions = spack.util.web.get_checksums_for_versions(
+        versions = spack.stage.get_checksums_for_versions(
            url_dict, name, first_stage_function=guesser,
            keep_stage=args.keep_stage)
    else:
--- a/lib/spack/spack/cmd/mirror.py
+++ b/lib/spack/spack/cmd/mirror.py
@ -4,20 +4,21 @@
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)

 import sys
-import os
-from datetime import datetime

 import argparse
 import llnl.util.tty as tty
 from llnl.util.tty.colify import colify

 import spack.cmd
+import spack.cmd.common.arguments as arguments
 import spack.concretize
 import spack.config
+import spack.environment as ev
 import spack.mirror
 import spack.repo
-import spack.cmd.common.arguments as arguments
-import spack.environment as ev
+import spack.util.url as url_util
+import spack.util.web as web_util
+
 from spack.spec import Spec
 from spack.error import SpackError
 from spack.util.spack_yaml import syaml_dict
@ -73,6 +74,19 @@ def setup_parser(subparser):
        default=spack.config.default_modify_scope(),
        help="configuration scope to modify")

+    # Set-Url
+    set_url_parser = sp.add_parser('set-url', help=mirror_set_url.__doc__)
+    set_url_parser.add_argument('name', help="mnemonic name for mirror")
+    set_url_parser.add_argument(
+        'url', help="url of mirror directory from 'spack mirror create'")
+    set_url_parser.add_argument(
+        '--push', action='store_true',
+        help="set only the URL used for uploading new packages")
+    set_url_parser.add_argument(
+        '--scope', choices=scopes, metavar=scopes_metavar,
+        default=spack.config.default_modify_scope(),
+        help="configuration scope to modify")
+
    # List
    list_parser = sp.add_parser('list', help=mirror_list.__doc__)
    list_parser.add_argument(
@ -83,20 +97,14 @@ def setup_parser(subparser):

 def mirror_add(args):
    """Add a mirror to Spack."""
-    url = args.url
-    if url.startswith('/'):
-        url = 'file://' + url
+    url = url_util.format(args.url)

    mirrors = spack.config.get('mirrors', scope=args.scope)
    if not mirrors:
        mirrors = syaml_dict()

-    for name, u in mirrors.items():
-        if name == args.name:
-            tty.die("Mirror with name %s already exists." % name)
-        if u == url:
-            tty.die("Mirror with url %s already exists." % url)
-        # should only be one item per mirror dict.
+    if args.name in mirrors:
+        tty.die("Mirror with name %s already exists." % args.name)

    items = [(n, u) for n, u in mirrors.items()]
    items.insert(0, (args.name, url))
@ -117,21 +125,86 @@ def mirror_remove(args):

    old_value = mirrors.pop(name)
    spack.config.set('mirrors', mirrors, scope=args.scope)
-    tty.msg("Removed mirror %s with url %s" % (name, old_value))
+
+    debug_msg_url = "url %s"
+    debug_msg = ["Removed mirror %s with"]
+    values = [name]
+
+    try:
+        fetch_value = old_value['fetch']
+        push_value = old_value['push']
+
+        debug_msg.extend(("fetch", debug_msg_url, "and push", debug_msg_url))
+        values.extend((fetch_value, push_value))
+    except TypeError:
+        debug_msg.append(debug_msg_url)
+        values.append(old_value)
+
+    tty.debug(" ".join(debug_msg) % tuple(values))
+    tty.msg("Removed mirror %s." % name)
+
+
+def mirror_set_url(args):
+    """Change the URL of a mirror."""
+    url = url_util.format(args.url)
+
+    mirrors = spack.config.get('mirrors', scope=args.scope)
+    if not mirrors:
+        mirrors = syaml_dict()
+
+    if args.name not in mirrors:
+        tty.die("No mirror found with name %s." % args.name)
+
+    entry = mirrors[args.name]
+
+    try:
+        fetch_url = entry['fetch']
+        push_url = entry['push']
+    except TypeError:
+        fetch_url, push_url = entry, entry
+
+    changes_made = False
+
+    if args.push:
+        changes_made = changes_made or push_url != url
+        push_url = url
+    else:
+        changes_made = (
+            changes_made or fetch_url != push_url or push_url != url)
+
+        fetch_url, push_url = url, url
+
+    items = [
+        (
+            (n, u)
+            if n != args.name else (
+                (n, {"fetch": fetch_url, "push": push_url})
+                if fetch_url != push_url else (n, fetch_url)
+            )
+        )
+        for n, u in mirrors.items()
+    ]
+
+    mirrors = syaml_dict(items)
+    spack.config.set('mirrors', mirrors, scope=args.scope)
+
+    if changes_made:
+        tty.msg(
+            "Changed%s url for mirror %s." %
+            ((" (push)" if args.push else ""), args.name))
+    else:
+        tty.msg("Url already set for mirror %s." % args.name)


 def mirror_list(args):
    """Print out available mirrors to the console."""
-    mirrors = spack.config.get('mirrors', scope=args.scope)
+
+    mirrors = spack.mirror.MirrorCollection(scope=args.scope)
    if not mirrors:
        tty.msg("No mirrors configured.")
        return

-    max_len = max(len(n) for n in mirrors.keys())
-    fmt = "%%-%ds%%s" % (max_len + 4)
-
-    for name in mirrors:
-        print(fmt % (name, mirrors[name]))
+    mirrors.display()


 def _read_specs_from_file(filename):
@ -188,14 +261,13 @@ def mirror_create(args):
            msg = 'Skipping {0} as it is an external spec.'
            tty.msg(msg.format(spec.cshort_spec))

-        # Default name for directory is spack-mirror-<DATESTAMP>
-        directory = args.directory
-        if not directory:
-            timestamp = datetime.now().strftime("%Y-%m-%d")
-            directory = 'spack-mirror-' + timestamp
+        mirror = spack.mirror.Mirror(
+            args.directory or spack.config.get('config:source_cache'))
+
+        directory = url_util.format(mirror.push_url)

        # Make sure nothing is in the way.
-        existed = os.path.isdir(directory)
+        existed = web_util.url_exists(directory)

        # Actually do the work to create the mirror
        present, mirrored, error = spack.mirror.create(
@ -220,6 +292,7 @@ def mirror(parser, args):
              'add': mirror_add,
              'remove': mirror_remove,
              'rm': mirror_remove,
+              'set-url': mirror_set_url,
              'list': mirror_list}

    if args.no_checksum:
--- a/lib/spack/spack/cmd/url.py
+++ b/lib/spack/spack/cmd/url.py
@ -5,10 +5,8 @@

 from __future__ import division, print_function
 from collections import defaultdict
-try:
-    from urllib.parse import urlparse
-except ImportError:
-    from urlparse import urlparse
+
+import six.moves.urllib.parse as urllib_parse

 import spack.fetch_strategy as fs
 import spack.repo
@ -262,7 +260,7 @@ def add(self, fetcher):
                self.checksums[algo] += 1

                # parse out the URL scheme (https/http/ftp/etc.)
-                urlinfo = urlparse(fetcher.url)
+                urlinfo = urllib_parse.urlparse(fetcher.url)
                self.schemes[urlinfo.scheme] += 1

            elif url_type == 'git':
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@ -23,6 +23,7 @@
        Archive a source directory, e.g. for creating a mirror.
 """
 import os
+import os.path
 import sys
 import re
 import shutil
@ -30,6 +31,7 @@
 import xml.etree.ElementTree
 from functools import wraps
 from six import string_types, with_metaclass
+import six.moves.urllib.parse as urllib_parse

 import llnl.util.tty as tty
 from llnl.util.filesystem import (
@ -39,6 +41,9 @@
 import spack.error
 import spack.util.crypto as crypto
 import spack.util.pattern as pattern
+import spack.util.web as web_util
+import spack.util.url as url_util
+
 from spack.util.executable import which
 from spack.util.string import comma_and, quote
 from spack.version import Version, ver
@ -48,6 +53,17 @@
 #: List of all fetch strategies, created by FetchStrategy metaclass.
 all_strategies = []

+CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = (
+    "The contents of {subject} look like {content_type}.  Either the URL"
+    " you are trying to use does not exist or you have an internet gateway"
+    " issue.  You can remove the bad archive using 'spack clean"
+    " <package>', then try again using the correct URL.")
+
+
+def warn_content_type_mismatch(subject, content_type='HTML'):
+    tty.warn(CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(
+        subject=subject, content_type=content_type))
+

 def _needs_stage(fun):
    """Many methods on fetch strategies require a stage to be set
@ -351,12 +367,7 @@ def fetch(self):
        content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
                                   flags=re.IGNORECASE)
        if content_types and 'text/html' in content_types[-1]:
-            msg = ("The contents of {0} look like HTML. Either the URL "
-                   "you are trying to use does not exist or you have an "
-                   "internet gateway issue. You can remove the bad archive "
-                   "using 'spack clean <package>', then try again using "
-                   "the correct URL.")
-            tty.warn(msg.format(self.archive_file or "the archive"))
+            warn_content_type_mismatch(self.archive_file or "the archive")

        if save_file:
            os.rename(partial_file, save_file)
@ -449,7 +460,10 @@ def archive(self, destination):
        if not self.archive_file:
            raise NoArchiveFileError("Cannot call archive() before fetching.")

-        shutil.copyfile(self.archive_file, destination)
+        web_util.push_to_url(
+            self.archive_file,
+            destination,
+            keep_original=True)

    @_needs_stage
    def check(self):
@ -1063,6 +1077,54 @@ def __str__(self):
        return "[hg] %s" % self.url


+class S3FetchStrategy(URLFetchStrategy):
+    """FetchStrategy that pulls from an S3 bucket."""
+    enabled = True
+    url_attr = 's3'
+
+    def __init__(self, *args, **kwargs):
+        try:
+            super(S3FetchStrategy, self).__init__(*args, **kwargs)
+        except ValueError:
+            if not kwargs.get('url'):
+                raise ValueError(
+                    "S3FetchStrategy requires a url for fetching.")
+
+    @_needs_stage
+    def fetch(self):
+        if self.archive_file:
+            tty.msg("Already downloaded %s" % self.archive_file)
+            return
+
+        parsed_url = url_util.parse(self.url)
+        if parsed_url.scheme != 's3':
+            raise ValueError(
+                'S3FetchStrategy can only fetch from s3:// urls.')
+
+        tty.msg("Fetching %s" % self.url)
+
+        basename = os.path.basename(parsed_url.path)
+
+        with working_dir(self.stage.path):
+            _, headers, stream = web_util.read_from_url(self.url)
+
+            with open(basename, 'wb') as f:
+                shutil.copyfileobj(stream, f)
+
+            content_type = headers['Content-type']
+
+        if content_type == 'text/html':
+            warn_content_type_mismatch(self.archive_file or "the archive")
+
+        if self.stage.save_filename:
+            os.rename(
+                os.path.join(self.stage.path, basename),
+                self.stage.save_filename)
+
+        if not self.archive_file:
+            raise FailedDownloadError(self.url)
+
+
 def from_url(url):
    """Given a URL, find an appropriate fetch strategy for it.
       Currently just gives you a URLFetchStrategy that uses curl.
@ -1206,6 +1268,34 @@ def for_package_version(pkg, version):
    raise InvalidArgsError(pkg, version, **args)


+def from_url_scheme(url, *args, **kwargs):
+    """Finds a suitable FetchStrategy by matching its url_attr with the scheme
+       in the given url."""
+
+    url = kwargs.get('url', url)
+    parsed_url = urllib_parse.urlparse(url, scheme='file')
+
+    scheme_mapping = (
+        kwargs.get('scheme_mapping') or
+        {
+            'file': 'url',
+            'http': 'url',
+            'https': 'url'
+        })
+
+    scheme = parsed_url.scheme
+    scheme = scheme_mapping.get(scheme, scheme)
+
+    for fetcher in all_strategies:
+        url_attr = getattr(fetcher, 'url_attr', None)
+        if url_attr and url_attr == scheme:
+            return fetcher(url, *args, **kwargs)
+
+    raise ValueError(
+        'No FetchStrategy found for url with scheme: "{SCHEME}"'.format(
+            SCHEME=parsed_url.scheme))
+
+
 def from_list_url(pkg):
    """If a package provides a URL which lists URLs for resources by
       version, this can can create a fetcher for a URL discovered for
--- a/lib/spack/spack/mirror.py
+++ b/lib/spack/spack/mirror.py
@ -13,6 +13,18 @@
 """
 import sys
 import os
+import os.path
+import operator
+
+import six
+
+import ruamel.yaml.error as yaml_error
+
+try:
+    from collections.abc import Mapping
+except ImportError:
+    from collections import Mapping
+
 import llnl.util.tty as tty
 from llnl.util.filesystem import mkdirp

@ -20,9 +32,205 @@
 import spack.error
 import spack.url as url
 import spack.fetch_strategy as fs
-from spack.spec import Spec
+import spack.util.spack_json as sjson
+import spack.util.spack_yaml as syaml
+import spack.util.url as url_util
+import spack.spec
 from spack.version import VersionList
 from spack.util.compression import allowed_archive
+from spack.util.spack_yaml import syaml_dict
+
+
+def _display_mirror_entry(size, name, url, type_=None):
+    if type_:
+        type_ = "".join((" (", type_, ")"))
+    else:
+        type_ = ""
+
+    print("%-*s%s%s" % (size + 4, name, url, type_))
+
+
+class Mirror(object):
+    """Represents a named location for storing source tarballs and binary
+    packages.
+
+    Mirrors have a fetch_url that indicate where and how artifacts are fetched
+    from them, and a push_url that indicate where and how artifacts are pushed
+    to them.  These two URLs are usually the same.
+    """
+
+    def __init__(self, fetch_url, push_url=None, name=None):
+        self._fetch_url = fetch_url
+        self._push_url = push_url
+        self._name = name
+
+    def to_json(self, stream=None):
+        return sjson.dump(self.to_dict(), stream)
+
+    def to_yaml(self, stream=None):
+        return syaml.dump(self.to_dict(), stream)
+
+    @staticmethod
+    def from_yaml(stream, name=None):
+        try:
+            data = syaml.load(stream)
+            return Mirror.from_dict(data, name)
+        except yaml_error.MarkedYAMLError as e:
+            raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
+
+    @staticmethod
+    def from_json(stream, name=None):
+        d = sjson.load(stream)
+        return Mirror.from_dict(d, name)
+
+    def to_dict(self):
+        if self._push_url is None:
+            return self._fetch_url
+        else:
+            return syaml_dict([
+                ('fetch', self._fetch_url),
+                ('push', self._push_url)])
+
+    @staticmethod
+    def from_dict(d, name=None):
+        if isinstance(d, six.string_types):
+            return Mirror(d, name=name)
+        else:
+            return Mirror(d['fetch'], d['push'], name)
+
+    def display(self, max_len=0):
+        if self._push_url is None:
+            _display_mirror_entry(max_len, self._name, self._fetch_url)
+        else:
+            _display_mirror_entry(
+                max_len, self._name, self._fetch_url, "fetch")
+            _display_mirror_entry(
+                max_len, self._name, self._push_url, "push")
+
+    def __str__(self):
+        name = self._name
+        if name is None:
+            name = ''
+        else:
+            name = ' "%s"' % name
+
+        if self._push_url is None:
+            return "[Mirror%s (%s)]" % (name, self._fetch_url)
+
+        return "[Mirror%s (fetch: %s, push: %s)]" % (
+            name, self._fetch_url, self._push_url)
+
+    def __repr__(self):
+        return ''.join((
+            'Mirror(',
+            ', '.join(
+                '%s=%s' % (k, repr(v))
+                for k, v in (
+                    ('fetch_url', self._fetch_url),
+                    ('push_url', self._push_url),
+                    ('name', self._name))
+                if k == 'fetch_url' or v),
+            ')'
+        ))
+
+    @property
+    def name(self):
+        return self._name or "<unnamed>"
+
+    @property
+    def fetch_url(self):
+        return self._fetch_url
+
+    @fetch_url.setter
+    def fetch_url(self, url):
+        self._fetch_url = url
+        self._normalize()
+
+    @property
+    def push_url(self):
+        if self._push_url is None:
+            return self._fetch_url
+        return self._push_url
+
+    @push_url.setter
+    def push_url(self, url):
+        self._push_url = url
+        self._normalize()
+
+    def _normalize(self):
+        if self._push_url is not None and self._push_url == self._fetch_url:
+            self._push_url = None
+
+
+class MirrorCollection(Mapping):
+    """A mapping of mirror names to mirrors."""
+
+    def __init__(self, mirrors=None, scope=None):
+        self._mirrors = dict(
+            (name, Mirror.from_dict(mirror, name))
+            for name, mirror in (
+                mirrors.items() if mirrors is not None else
+                spack.config.get('mirrors', scope=scope).items()))
+
+    def to_json(self, stream=None):
+        return sjson.dump(self.to_dict(True), stream)
+
+    def to_yaml(self, stream=None):
+        return syaml.dump(self.to_dict(True), stream)
+
+    @staticmethod
+    def from_yaml(stream, name=None):
+        try:
+            data = syaml.load(stream)
+            return MirrorCollection(data)
+        except yaml_error.MarkedYAMLError as e:
+            raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
+
+    @staticmethod
+    def from_json(stream, name=None):
+        d = sjson.load(stream)
+        return MirrorCollection(d)
+
+    def to_dict(self, recursive=False):
+        return syaml_dict(sorted(
+            (
+                (k, (v.to_dict() if recursive else v))
+                for (k, v) in self._mirrors.items()
+            ), key=operator.itemgetter(0)
+        ))
+
+    @staticmethod
+    def from_dict(d):
+        return MirrorCollection(d)
+
+    def __getitem__(self, item):
+        return self._mirrors[item]
+
+    def display(self):
+        max_len = max(len(mirror.name) for mirror in self._mirrors.values())
+        for mirror in self._mirrors.values():
+            mirror.display(max_len)
+
+    def lookup(self, name_or_url):
+        """Looks up and returns a Mirror.
+
+        If this MirrorCollection contains a named Mirror under the name
+        [name_or_url], then that mirror is returned.  Otherwise, [name_or_url]
+        is assumed to be a mirror URL, and an anonymous mirror with the given
+        URL is returned.
+        """
+        result = self.get(name_or_url)
+
+        if result is None:
+            result = Mirror(fetch_url=name_or_url)
+
+        return result
+
+    def __iter__(self):
+        return iter(self._mirrors)
+
+    def __len__(self):
+        return len(self._mirrors)


 def mirror_archive_filename(spec, fetcher, resource_id=None):
@ -114,7 +322,7 @@ def get_matching_versions(specs, **kwargs):

            # Generate only versions that satisfy the spec.
            if spec.concrete or v.satisfies(spec.versions):
-                s = Spec(pkg.name)
+                s = spack.spec.Spec(pkg.name)
                s.versions = VersionList([v])
                s.variants = spec.variants.copy()
                # This is needed to avoid hanging references during the
@ -166,12 +374,17 @@ def create(path, specs, **kwargs):
    it creates specs for those versions.  If the version satisfies any spec
    in the specs list, it is downloaded and added to the mirror.
    """
+    parsed = url_util.parse(path)
+    mirror_root = url_util.local_file_path(parsed)
+
    # Make sure nothing is in the way.
-    if os.path.isfile(path):
-        raise MirrorError("%s already exists and is a file." % path)
+    if mirror_root and os.path.isfile(mirror_root):
+        raise MirrorError("%s already exists and is a file." % mirror_root)

    # automatically spec-ify anything in the specs array.
-    specs = [s if isinstance(s, Spec) else Spec(s) for s in specs]
+    specs = [
+        s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s)
+        for s in specs]

    # Get concrete specs for each matching version of these specs.
    version_specs = get_matching_versions(
@ -180,8 +393,7 @@ def create(path, specs, **kwargs):
        s.concretize()

    # Get the absolute path of the root before we start jumping around.
-    mirror_root = os.path.abspath(path)
-    if not os.path.isdir(mirror_root):
+    if mirror_root and not os.path.isdir(mirror_root):
        try:
            mkdirp(mirror_root)
        except OSError as e:
@ -195,12 +407,12 @@ def create(path, specs, **kwargs):
        'error': []
    }

-    mirror_cache = spack.caches.MirrorCache(mirror_root)
+    mirror_cache = spack.caches.MirrorCache(parsed)
    try:
        spack.caches.mirror_cache = mirror_cache
        # Iterate through packages and download all safe tarballs for each
        for spec in version_specs:
-            add_single_spec(spec, mirror_root, categories, **kwargs)
+            add_single_spec(spec, parsed, categories, **kwargs)
    finally:
        spack.caches.mirror_cache = None

--- a/lib/spack/spack/s3_handler.py
+++ b/lib/spack/spack/s3_handler.py
@ -0,0 +1,92 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+from io import BufferedReader
+
+import six.moves.urllib.response as urllib_response
+import six.moves.urllib.request as urllib_request
+import six.moves.urllib.error as urllib_error
+
+import spack.util.s3 as s3_util
+import spack.util.url as url_util
+import spack.util.web as web_util
+
+
+# NOTE(opadron): Workaround issue in boto where its StreamingBody
+# implementation is missing several APIs expected from IOBase.  These missing
+# APIs prevent the streams returned by boto from being passed as-are along to
+# urllib.
+#
+# https://github.com/boto/botocore/issues/879
+# https://github.com/python/cpython/pull/3249
+class WrapStream(BufferedReader):
+    def __init__(self, raw):
+        raw.readable = lambda: True
+        raw.writable = lambda: False
+        raw.seekable = lambda: False
+        raw.closed = False
+        raw.flush = lambda: None
+        super(WrapStream, self).__init__(raw)
+
+    def detach(self):
+        self.raw = None
+
+    def read(self, *args, **kwargs):
+        return self.raw.read(*args, **kwargs)
+
+    def __getattr__(self, key):
+        return getattr(self.raw, key)
+
+
+def _s3_open(url):
+    parsed = url_util.parse(url)
+    s3 = s3_util.create_s3_session(parsed)
+
+    bucket = parsed.netloc
+    key = parsed.path
+
+    if key.startswith('/'):
+        key = key[1:]
+
+    obj = s3.get_object(Bucket=bucket, Key=key)
+
+    # NOTE(opadron): Apply workaround here (see above)
+    stream = WrapStream(obj['Body'])
+    headers = web_util.standardize_header_names(
+        obj['ResponseMetadata']['HTTPHeaders'])
+
+    return url, headers, stream
+
+
+class UrllibS3Handler(urllib_request.HTTPSHandler):
+    def s3_open(self, req):
+        orig_url = req.get_full_url()
+        from botocore.exceptions import ClientError
+        try:
+            url, headers, stream = _s3_open(orig_url)
+            return urllib_response.addinfourl(stream, headers, url)
+        except ClientError as err:
+            # if no such [KEY], but [KEY]/index.html exists,
+            # return that, instead.
+            if err.response['Error']['Code'] == 'NoSuchKey':
+                try:
+                    _, headers, stream = _s3_open(
+                        url_util.join(orig_url, 'index.html'))
+                    return urllib_response.addinfourl(
+                        stream, headers, orig_url)
+
+                except ClientError as err2:
+                    if err.response['Error']['Code'] == 'NoSuchKey':
+                        # raise original error
+                        raise urllib_error.URLError(err)
+
+                    raise urllib_error.URLError(err2)
+
+            raise urllib_error.URLError(err)
+
+
+S3OpenerDirector = urllib_request.build_opener(UrllibS3Handler())
+
+open = S3OpenerDirector.open
--- a/lib/spack/spack/schema/mirrors.py
+++ b/lib/spack/spack/schema/mirrors.py
@ -17,7 +17,19 @@
        'default': {},
        'additionalProperties': False,
        'patternProperties': {
-            r'\w[\w-]*': {'type': 'string'},
+            r'\w[\w-]*': {
+                'anyOf': [
+                    {'type': 'string'},
+                    {
+                        'type': 'object',
+                        'required': ['fetch', 'push'],
+                        'properties': {
+                            'fetch': {'type': 'string'},
+                            'push': {'type': 'string'}
+                        }
+                    }
+                ]
+            },
        },
    },
 }
--- a/lib/spack/spack/stage.py
+++ b/lib/spack/spack/stage.py
@ -12,7 +12,6 @@
 import getpass
 from six import string_types
 from six import iteritems
-from six.moves.urllib.parse import urljoin

 import llnl.util.tty as tty
 from llnl.util.filesystem import mkdirp, can_access, install, install_tree
@ -20,12 +19,16 @@

 import spack.paths
 import spack.caches
+import spack.cmd
 import spack.config
 import spack.error
+import spack.mirror
 import spack.util.lock
 import spack.fetch_strategy as fs
 import spack.util.pattern as pattern
 import spack.util.path as sup
+import spack.util.url as url_util
+
 from spack.util.crypto import prefix_bits, bit_length


@ -252,7 +255,7 @@ def __init__(
        # TODO: fetch/stage coupling needs to be reworked -- the logic
        # TODO: here is convoluted and not modular enough.
        if isinstance(url_or_fetch_strategy, string_types):
-            self.fetcher = fs.from_url(url_or_fetch_strategy)
+            self.fetcher = fs.from_url_scheme(url_or_fetch_strategy)
        elif isinstance(url_or_fetch_strategy, fs.FetchStrategy):
            self.fetcher = url_or_fetch_strategy
        else:
@ -397,16 +400,9 @@ def fetch(self, mirror_only=False):
        # TODO: CompositeFetchStrategy here.
        self.skip_checksum_for_mirror = True
        if self.mirror_path:
-            mirrors = spack.config.get('mirrors')
-
-            # Join URLs of mirror roots with mirror paths. Because
-            # urljoin() will strip everything past the final '/' in
-            # the root, so we add a '/' if it is not present.
-            mir_roots = [
-                sup.substitute_path_variables(root) if root.endswith(os.sep)
-                else sup.substitute_path_variables(root) + os.sep
-                for root in mirrors.values()]
-            urls = [urljoin(root, self.mirror_path) for root in mir_roots]
+            urls = [
+                url_util.join(mirror.fetch_url, self.mirror_path)
+                for mirror in spack.mirror.MirrorCollection().values()]

            # If this archive is normally fetched from a tarball URL,
            # then use the same digest.  `spack mirror` ensures that
@ -425,9 +421,12 @@ def fetch(self, mirror_only=False):

            # Add URL strategies for all the mirrors with the digest
            for url in urls:
-                fetchers.insert(
-                    0, fs.URLFetchStrategy(
-                        url, digest, expand=expand, extension=extension))
+                fetchers.append(fs.from_url_scheme(
+                    url, digest, expand=expand, extension=extension))
+                # fetchers.insert(
+                #     0, fs.URLFetchStrategy(
+                #         url, digest, expand=expand, extension=extension))
+
            if self.default_fetcher.cachable:
                fetchers.insert(
                    0, spack.caches.fetch_cache.fetcher(
@ -708,6 +707,91 @@ def purge():
                remove_linked_tree(stage_path)


+def get_checksums_for_versions(
+        url_dict, name, first_stage_function=None, keep_stage=False):
+    """Fetches and checksums archives from URLs.
+
+    This function is called by both ``spack checksum`` and ``spack
+    create``.  The ``first_stage_function`` argument allows the caller to
+    inspect the first downloaded archive, e.g., to determine the build
+    system.
+
+    Args:
+        url_dict (dict): A dictionary of the form: version -> URL
+        name (str): The name of the package
+        first_stage_function (callable): function that takes a Stage and a URL;
+            this is run on the stage of the first URL downloaded
+        keep_stage (bool): whether to keep staging area when command completes
+
+    Returns:
+        (str): A multi-line string containing versions and corresponding hashes
+
+    """
+    sorted_versions = sorted(url_dict.keys(), reverse=True)
+
+    # Find length of longest string in the list for padding
+    max_len = max(len(str(v)) for v in sorted_versions)
+    num_ver = len(sorted_versions)
+
+    tty.msg("Found {0} version{1} of {2}:".format(
+            num_ver, '' if num_ver == 1 else 's', name),
+            "",
+            *spack.cmd.elide_list(
+                ["{0:{1}}  {2}".format(str(v), max_len, url_dict[v])
+                 for v in sorted_versions]))
+    tty.msg('')
+
+    archives_to_fetch = tty.get_number(
+        "How many would you like to checksum?", default=1, abort='q')
+
+    if not archives_to_fetch:
+        tty.die("Aborted.")
+
+    versions = sorted_versions[:archives_to_fetch]
+    urls = [url_dict[v] for v in versions]
+
+    tty.msg("Downloading...")
+    version_hashes = []
+    i = 0
+    for url, version in zip(urls, versions):
+        try:
+            with Stage(url, keep=keep_stage) as stage:
+                # Fetch the archive
+                stage.fetch()
+                if i == 0 and first_stage_function:
+                    # Only run first_stage_function the first time,
+                    # no need to run it every time
+                    first_stage_function(stage, url)
+
+                # Checksum the archive and add it to the list
+                version_hashes.append((version, spack.util.crypto.checksum(
+                    hashlib.sha256, stage.archive_file)))
+                i += 1
+        except FailedDownloadError:
+            tty.msg("Failed to fetch {0}".format(url))
+        except Exception as e:
+            tty.msg("Something failed on {0}, skipping.".format(url),
+                    "  ({0})".format(e))
+
+    if not version_hashes:
+        tty.die("Could not fetch any versions for {0}".format(name))
+
+    # Find length of longest string in the list for padding
+    max_len = max(len(str(v)) for v, h in version_hashes)
+
+    # Generate the version directives to put in a package.py
+    version_lines = "\n".join([
+        "    version('{0}', {1}sha256='{2}')".format(
+            v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
+    ])
+
+    num_hash = len(version_hashes)
+    tty.msg("Checksummed {0} version{1} of {2}".format(
+        num_hash, '' if num_hash == 1 else 's', name))
+
+    return version_lines
+
+
 class StageError(spack.error.SpackError):
    """"Superclass for all errors encountered during staging."""

@ -720,5 +804,9 @@ class RestageError(StageError):
    """"Error encountered during restaging."""


+class VersionFetchError(StageError):
+    """Raised when we can't determine a URL to fetch a package."""
+
+
 # Keep this in namespace for convenience
 FailedDownloadError = fs.FailedDownloadError
--- a/lib/spack/spack/test/cmd/pkg.py
+++ b/lib/spack/spack/test/cmd/pkg.py
@ -53,6 +53,8 @@ def mock_pkg_git_repo(tmpdir_factory):

        # initial commit with mock packages
        git('add', '.')
+        git('config', 'user.email', 'testing@spack.io')
+        git('config', 'user.name', 'Spack Testing')
        git('commit', '-m', 'initial mock repo commit')

        # add commit with pkg-a, pkg-b, pkg-c packages
--- a/lib/spack/spack/test/config.py
+++ b/lib/spack/spack/test/config.py
@ -595,6 +595,7 @@ def test_bad_config_section(mock_config):
        spack.config.get('foobar')


+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_bad_command_line_scopes(tmpdir, mock_config):
    cfg = spack.config.Configuration()

--- a/lib/spack/spack/test/llnl/util/lock.py
+++ b/lib/spack/spack/test/llnl/util/lock.py
@ -546,6 +546,7 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path):
        timeout_write(lock_path, 5, 1))


+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_read_lock_on_read_only_lockfile(lock_dir, lock_path):
    """read-only directory, read-only lockfile."""
    touch(lock_path)
@ -573,6 +574,7 @@ def test_read_lock_read_only_dir_writable_lockfile(lock_dir, lock_path):
            pass


+@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
 def test_read_lock_no_lockfile(lock_dir, lock_path):
    """read-only directory, no lockfile (so can't create)."""
    with read_only(lock_dir):
--- a/lib/spack/spack/test/stage.py
+++ b/lib/spack/spack/test/stage.py
@ -653,6 +653,7 @@ def test_source_path_available(self, mock_stage_archive):
        assert source_path.endswith(spack.stage._source_path_subdir)
        assert not os.path.exists(source_path)

+    @pytest.mark.skipif(os.getuid() == 0, reason='user is root')
    def test_first_accessible_path(self, tmpdir):
        """Test _first_accessible_path names."""
        spack_dir = tmpdir.join('paths')
@ -783,6 +784,7 @@ def test_resolve_paths(self):

        assert spack.stage._resolve_paths(paths) == res_paths

+    @pytest.mark.skipif(os.getuid() == 0, reason='user is root')
    def test_get_stage_root_bad_path(self, clear_stage_root):
        """Ensure an invalid stage path root raises a StageError."""
        with spack.config.override('config:build_stage', '/no/such/path'):
--- a/lib/spack/spack/util/s3.py
+++ b/lib/spack/spack/util/s3.py
@ -0,0 +1,44 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import os
+
+import six.moves.urllib.parse as urllib_parse
+
+import spack
+import spack.util.url as url_util
+
+
+def create_s3_session(url):
+    url = url_util.parse(url)
+    if url.scheme != 's3':
+        raise ValueError(
+            'Can not create S3 session from URL with scheme: {SCHEME}'.format(
+                SCHEME=url.scheme))
+
+    # NOTE(opadron): import boto and friends as late as possible.  We don't
+    # want to require boto as a dependency unless the user actually wants to
+    # access S3 mirrors.
+    from boto3 import Session
+
+    session = Session()
+
+    s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')}
+
+    endpoint_url = os.environ.get('S3_ENDPOINT_URL')
+    if endpoint_url:
+        if urllib_parse.urlparse(endpoint_url, scheme=None).scheme is None:
+            endpoint_url = '://'.join(('https', endpoint_url))
+
+        s3_client_args['endpoint_url'] = endpoint_url
+
+    # if no access credentials provided above, then access anonymously
+    if not session.get_credentials():
+        from botocore import UNSIGNED
+        from botocore.client import Config
+
+        s3_client_args["config"] = Config(signature_version=UNSIGNED)
+
+    return session.client('s3', **s3_client_args)
--- a/lib/spack/spack/util/url.py
+++ b/lib/spack/spack/util/url.py
@ -0,0 +1,175 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+"""
+Utility functions for parsing, formatting, and manipulating URLs.
+"""
+
+import itertools
+import os.path
+
+from six import string_types
+import six.moves.urllib.parse as urllib_parse
+
+import spack.util.path
+
+
+def _split_all(path):
+    """Split path into its atomic components.
+
+    Returns the shortest list, L, of strings such that os.path.join(*L) == path
+    and os.path.split(element) == ('', element) for every element in L except
+    possibly the first.  This first element may possibly have the value of '/',
+    or some other OS-dependent path root.
+    """
+    result = []
+    a = path
+    old_a = None
+    while a != old_a:
+        (old_a, (a, b)) = a, os.path.split(a)
+
+        if a or b:
+            result.insert(0, b or '/')
+
+    return result
+
+
+def local_file_path(url):
+    """Get a local file path from a url.
+
+    If url is a file:// URL, return the absolute path to the local
+    file or directory referenced by it.  Otherwise, return None.
+    """
+    if isinstance(url, string_types):
+        url = parse(url)
+
+    if url.scheme == 'file':
+        return url.path
+    return None
+
+
+def parse(url, scheme='file'):
+    """Parse a mirror url.
+
+    For file:// URLs, the netloc and path components are concatenated and
+    passed through spack.util.path.canoncalize_path().
+
+    Otherwise, the returned value is the same as urllib's urlparse() with
+    allow_fragments=False.
+    """
+
+    url_obj = (
+        urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False)
+        if isinstance(url, string_types) else url)
+
+    (scheme, netloc, path, params, query, _) = url_obj
+    scheme = (scheme or 'file').lower()
+
+    if scheme == 'file':
+        path = spack.util.path.canonicalize_path(netloc + path)
+        while path.startswith('//'):
+            path = path[1:]
+        netloc = ''
+
+    return urllib_parse.ParseResult(scheme=scheme,
+                                    netloc=netloc,
+                                    path=path,
+                                    params=params,
+                                    query=query,
+                                    fragment=None)
+
+
+def format(parsed_url):
+    """Format a URL string
+
+    Returns a canonicalized format of the given URL as a string.
+    """
+    if isinstance(parsed_url, string_types):
+        parsed_url = parse(parsed_url)
+
+    return parsed_url.geturl()
+
+
+def join(base_url, path, *extra, **kwargs):
+    """Joins a base URL with one or more local URL path components
+
+    If resolve_href is True, treat the base URL as though it where the locator
+    of a web page, and the remaining URL path components as though they formed
+    a relative URL to be resolved against it (i.e.: as in os.path.join(...)).
+    The result is an absolute URL to the resource to which a user's browser
+    would navigate if they clicked on a link with an "href" attribute equal to
+    the relative URL.
+
+    If resolve_href is False (default), then the URL path components are joined
+    as in os.path.join().
+
+    Examples:
+      base_url = 's3://bucket/index.html'
+      body = fetch_body(prefix)
+      link = get_href(body) # link == '../other-bucket/document.txt'
+
+      # wrong - link is a local URL that needs to be resolved against base_url
+      spack.util.url.join(base_url, link)
+      's3://bucket/other_bucket/document.txt'
+
+      # correct - resolve local URL against base_url
+      spack.util.url.join(base_url, link, resolve_href=True)
+      's3://other_bucket/document.txt'
+
+      prefix = 'https://mirror.spack.io/build_cache'
+
+      # wrong - prefix is just a URL prefix
+      spack.util.url.join(prefix, 'my-package', resolve_href=True)
+      'https://mirror.spack.io/my-package'
+
+      # correct - simply append additional URL path components
+      spack.util.url.join(prefix, 'my-package', resolve_href=False) # default
+      'https://mirror.spack.io/build_cache/my-package'
+    """
+    base_url = parse(base_url)
+    resolve_href = kwargs.get('resolve_href', False)
+
+    (scheme, netloc, base_path, params, query, _) = base_url
+    scheme = scheme.lower()
+
+    path_tokens = [
+        part for part in itertools.chain(
+            _split_all(path),
+            itertools.chain.from_iterable(
+                _split_all(extra_path) for extra_path in extra))
+        if part and part != '/']
+
+    base_path_args = ['/fake-root']
+    if scheme == 's3':
+        if netloc:
+            base_path_args.append(netloc)
+
+    if base_path.startswith('/'):
+        base_path = base_path[1:]
+
+    base_path_args.append(base_path)
+
+    if resolve_href:
+        new_base_path, _ = os.path.split(os.path.join(*base_path_args))
+        base_path_args = [new_base_path]
+
+    base_path_args.extend(path_tokens)
+    base_path = os.path.relpath(os.path.join(*base_path_args), '/fake-root')
+
+    if scheme == 's3':
+        path_tokens = [
+            part for part in _split_all(base_path)
+            if part and part != '/']
+
+        if path_tokens:
+            netloc = path_tokens.pop(0)
+            base_path = os.path.join('', *path_tokens)
+
+    return format(urllib_parse.ParseResult(scheme=scheme,
+                                           netloc=netloc,
+                                           path=base_path,
+                                           params=params,
+                                           query=query,
+                                           fragment=None))
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@ -5,16 +5,21 @@

 from __future__ import print_function

+import codecs
+import errno
 import re
 import os
+import os.path
+import shutil
 import ssl
 import sys
 import traceback
-import hashlib

+from itertools import product
+
+import six
 from six.moves.urllib.request import urlopen, Request
 from six.moves.urllib.error import URLError
-from six.moves.urllib.parse import urljoin
 import multiprocessing.pool

 try:
@ -28,20 +33,47 @@
    class HTMLParseError(Exception):
        pass

+from llnl.util.filesystem import mkdirp
 import llnl.util.tty as tty

-import spack.config
 import spack.cmd
-import spack.url
-import spack.stage
+import spack.config
 import spack.error
+import spack.url
 import spack.util.crypto
+import spack.util.s3 as s3_util
+import spack.util.url as url_util
+
 from spack.util.compression import ALLOWED_ARCHIVE_TYPES


 # Timeout in seconds for web requests
 _timeout = 10

+# See docstring for standardize_header_names()
+_separators = ('', ' ', '_', '-')
+HTTP_HEADER_NAME_ALIASES = {
+    "Accept-ranges": set(
+        ''.join((A, 'ccept', sep, R, 'anges'))
+        for A, sep, R in product('Aa', _separators, 'Rr')),
+
+    "Content-length": set(
+        ''.join((C, 'ontent', sep, L, 'ength'))
+        for C, sep, L in product('Cc', _separators, 'Ll')),
+
+    "Content-type": set(
+        ''.join((C, 'ontent', sep, T, 'ype'))
+        for C, sep, T in product('Cc', _separators, 'Tt')),
+
+    "Date": set(('Date', 'date')),
+
+    "Last-modified": set(
+        ''.join((L, 'ast', sep, M, 'odified'))
+        for L, sep, M in product('Ll', _separators, 'Mm')),
+
+    "Server": set(('Server', 'server'))
+}
+

 class LinkParser(HTMLParser):
    """This parser just takes an HTML page and strips out the hrefs on the
@ -59,7 +91,7 @@ def handle_starttag(self, tag, attrs):


 class NonDaemonProcess(multiprocessing.Process):
-    """Process tha allows sub-processes, so pools can have sub-pools."""
+    """Process that allows sub-processes, so pools can have sub-pools."""
    @property
    def daemon(self):
        return False
@ -86,25 +118,53 @@ def __init__(self, *args, **kwargs):
            super(NonDaemonPool, self).__init__(*args, **kwargs)


-def _read_from_url(url, accept_content_type=None):
+def uses_ssl(parsed_url):
+    if parsed_url.scheme == 'https':
+        return True
+
+    if parsed_url.scheme == 's3':
+        endpoint_url = os.environ.get('S3_ENDPOINT_URL')
+        if not endpoint_url:
+            return True
+
+        if url_util.parse(endpoint_url, scheme='https').scheme == 'https':
+            return True
+
+    return False
+
+
+__UNABLE_TO_VERIFY_SSL = (
+    lambda pyver: (
+        (pyver < (2, 7, 9)) or
+        ((3,) < pyver < (3, 4, 3))
+    ))(sys.version_info)
+
+
+def read_from_url(url, accept_content_type=None):
+    url = url_util.parse(url)
    context = None
+
    verify_ssl = spack.config.get('config:verify_ssl')
-    pyver = sys.version_info
-    if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)):
+
+    # Don't even bother with a context unless the URL scheme is one that uses
+    # SSL certs.
+    if uses_ssl(url):
        if verify_ssl:
-            tty.warn("Spack will not check SSL certificates. You need to "
-                     "update your Python to enable certificate "
-                     "verification.")
-    elif verify_ssl:
-        # without a defined context, urlopen will not verify the ssl cert for
-        # python 3.x
-        context = ssl.create_default_context()
-    else:
-        context = ssl._create_unverified_context()
+            if __UNABLE_TO_VERIFY_SSL:
+                # User wants SSL verification, but it cannot be provided.
+                warn_no_ssl_cert_checking()
+            else:
+                # User wants SSL verification, and it *can* be provided.
+                context = ssl.create_default_context()
+        else:
+            # User has explicitly indicated that they do not want SSL
+            # verification.
+            context = ssl._create_unverified_context()

-    req = Request(url)
-
-    if accept_content_type:
+    req = Request(url_util.format(url))
+    content_type = None
+    is_web_url = url.scheme in ('http', 'https')
+    if accept_content_type and is_web_url:
        # Make a HEAD request first to check the content type.  This lets
        # us ignore tarballs and gigantic files.
        # It would be nice to do this with the HTTP Accept header to avoid
@ -113,29 +173,179 @@ def _read_from_url(url, accept_content_type=None):
        req.get_method = lambda: "HEAD"
        resp = _urlopen(req, timeout=_timeout, context=context)

-        if "Content-type" not in resp.headers:
-            tty.debug("ignoring page " + url)
-            return None, None
-
-        if not resp.headers["Content-type"].startswith(accept_content_type):
-            tty.debug("ignoring page " + url + " with content type " +
-                      resp.headers["Content-type"])
-            return None, None
+        content_type = resp.headers.get('Content-type')

    # Do the real GET request when we know it's just HTML.
    req.get_method = lambda: "GET"
    response = _urlopen(req, timeout=_timeout, context=context)
-    response_url = response.geturl()

-    # Read the page and and stick it in the map we'll return
-    page = response.read().decode('utf-8')
+    if accept_content_type and not is_web_url:
+        content_type = response.headers.get('Content-type')

-    return response_url, page
+    reject_content_type = (
+        accept_content_type and (
+            content_type is None or
+            not content_type.startswith(accept_content_type)))
+
+    if reject_content_type:
+        tty.debug("ignoring page {0}{1}{2}".format(
+            url_util.format(url),
+            " with content type " if content_type is not None else "",
+            content_type or ""))
+
+        return None, None, None
+
+    return response.geturl(), response.headers, response


-def read_from_url(url, accept_content_type=None):
-    resp_url, contents = _read_from_url(url, accept_content_type)
-    return contents
+def warn_no_ssl_cert_checking():
+    tty.warn("Spack will not check SSL certificates. You need to update "
+             "your Python to enable certificate verification.")
+
+
+def push_to_url(local_path, remote_path, **kwargs):
+    keep_original = kwargs.get('keep_original', True)
+
+    local_url = url_util.parse(local_path)
+    local_file_path = url_util.local_file_path(local_url)
+    if local_file_path is None:
+        raise ValueError('local path must be a file:// url')
+
+    remote_url = url_util.parse(remote_path)
+    verify_ssl = spack.config.get('config:verify_ssl')
+
+    if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url):
+        warn_no_ssl_cert_checking()
+
+    remote_file_path = url_util.local_file_path(remote_url)
+    if remote_file_path is not None:
+        mkdirp(os.path.dirname(remote_file_path))
+        if keep_original:
+            shutil.copy(local_file_path, remote_file_path)
+        else:
+            try:
+                os.rename(local_file_path, remote_file_path)
+            except OSError as e:
+                if e.errno == errno.EXDEV:
+                    # NOTE(opadron): The above move failed because it crosses
+                    # filesystem boundaries.  Copy the file (plus original
+                    # metadata), and then delete the original.  This operation
+                    # needs to be done in separate steps.
+                    shutil.copy2(local_file_path, remote_file_path)
+                    os.remove(local_file_path)
+
+    elif remote_url.scheme == 's3':
+        extra_args = kwargs.get('extra_args', {})
+
+        remote_path = remote_url.path
+        while remote_path.startswith('/'):
+            remote_path = remote_path[1:]
+
+        s3 = s3_util.create_s3_session(remote_url)
+        s3.upload_file(local_file_path, remote_url.netloc,
+                       remote_path, ExtraArgs=extra_args)
+
+        if not keep_original:
+            os.remove(local_file_path)
+
+    else:
+        raise NotImplementedError(
+            'Unrecognized URL scheme: {SCHEME}'.format(
+                SCHEME=remote_url.scheme))
+
+
+def url_exists(url):
+    url = url_util.parse(url)
+    local_path = url_util.local_file_path(url)
+    if local_path:
+        return os.path.exists(local_path)
+
+    if url.scheme == 's3':
+        s3 = s3_util.create_s3_session(url)
+        from botocore.exceptions import ClientError
+        try:
+            s3.get_object(Bucket=url.netloc, Key=url.path)
+            return True
+        except ClientError as err:
+            if err.response['Error']['Code'] == 'NoSuchKey':
+                return False
+            raise err
+
+    # otherwise, just try to "read" from the URL, and assume that *any*
+    # non-throwing response contains the resource represented by the URL
+    try:
+        read_from_url(url)
+        return True
+    except URLError:
+        return False
+
+
+def remove_url(url):
+    url = url_util.parse(url)
+
+    local_path = url_util.local_file_path(url)
+    if local_path:
+        os.remove(local_path)
+        return
+
+    if url.scheme == 's3':
+        s3 = s3_util.create_s3_session(url)
+        s3.delete_object(Bucket=url.s3_bucket, Key=url.path)
+        return
+
+    # Don't even try for other URL schemes.
+
+
+def _list_s3_objects(client, url, num_entries, start_after=None):
+    list_args = dict(
+        Bucket=url.netloc,
+        Prefix=url.path,
+        MaxKeys=num_entries)
+
+    if start_after is not None:
+        list_args['StartAfter'] = start_after
+
+    result = client.list_objects_v2(**list_args)
+
+    last_key = None
+    if result['IsTruncated']:
+        last_key = result['Contents'][-1]['Key']
+
+    iter = (key for key in
+            (
+                os.path.relpath(entry['Key'], url.path)
+                for entry in result['Contents']
+            )
+            if key != '.')
+
+    return iter, last_key
+
+
+def _iter_s3_prefix(client, url, num_entries=1024):
+    key = None
+    while True:
+        contents, key = _list_s3_objects(
+            client, url, num_entries, start_after=key)
+
+        for x in contents:
+            yield x
+
+        if not key:
+            break
+
+
+def list_url(url):
+    url = url_util.parse(url)
+
+    local_path = url_util.local_file_path(url)
+    if local_path:
+        return os.listdir(local_path)
+
+    if url.scheme == 's3':
+        s3 = s3_util.create_s3_session(url)
+        return list(set(
+            key.split('/', 1)[0]
+            for key in _iter_s3_prefix(s3, url)))


 def _spider(url, visited, root, depth, max_depth, raise_on_error):
@ -154,16 +364,12 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
    pages = {}     # dict from page URL -> text content.
    links = set()  # set of all links seen on visited pages.

-    # root may end with index.html -- chop that off.
-    if root.endswith('/index.html'):
-        root = re.sub('/index.html$', '', root)
-
    try:
-        response_url, page = _read_from_url(url, 'text/html')
-
-        if not response_url or not page:
+        response_url, _, response = read_from_url(url, 'text/html')
+        if not response_url or not response:
            return pages, links

+        page = codecs.getreader('utf-8')(response).read()
        pages[response_url] = page

        # Parse out the links in the page
@ -173,8 +379,10 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):

        while link_parser.links:
            raw_link = link_parser.links.pop()
-            abs_link = urljoin(response_url, raw_link.strip())
-
+            abs_link = url_util.join(
+                response_url,
+                raw_link.strip(),
+                resolve_href=True)
            links.add(abs_link)

            # Skip stuff that looks like an archive
@ -243,16 +451,28 @@ def _spider_wrapper(args):
    return _spider(*args)


-def _urlopen(*args, **kwargs):
+def _urlopen(req, *args, **kwargs):
    """Wrapper for compatibility with old versions of Python."""
-    # We don't pass 'context' parameter to urlopen because it
-    # was introduces only starting versions 2.7.9 and 3.4.3 of Python.
-    if 'context' in kwargs and kwargs['context'] is None:
+    url = req
+    try:
+        url = url.get_full_url()
+    except AttributeError:
+        pass
+
+    # We don't pass 'context' parameter because it was only introduced starting
+    # with versions 2.7.9 and 3.4.3 of Python.
+    if 'context' in kwargs:
        del kwargs['context']
-    return urlopen(*args, **kwargs)
+
+    opener = urlopen
+    if url_util.parse(url).scheme == 's3':
+        import spack.s3_handler
+        opener = spack.s3_handler.open
+
+    return opener(req, *args, **kwargs)


-def spider(root_url, depth=0):
+def spider(root, depth=0):
    """Gets web pages from a root URL.

       If depth is specified (e.g., depth=2), then this will also follow
@ -262,7 +482,9 @@ def spider(root_url, depth=0):
       performance over a sequential fetch.

    """
-    pages, links = _spider(root_url, set(), root_url, 0, depth, False)
+
+    root = url_util.parse(root)
+    pages, links = _spider(root, set(), root, 0, depth, False)
    return pages, links


@ -356,99 +578,112 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
    return versions


-def get_checksums_for_versions(
-        url_dict, name, first_stage_function=None, keep_stage=False):
-    """Fetches and checksums archives from URLs.
+def standardize_header_names(headers):
+    """Replace certain header names with standardized spellings.

-    This function is called by both ``spack checksum`` and ``spack
-    create``.  The ``first_stage_function`` argument allows the caller to
-    inspect the first downloaded archive, e.g., to determine the build
-    system.
+    Standardizes the spellings of the following header names:
+    - Accept-ranges
+    - Content-length
+    - Content-type
+    - Date
+    - Last-modified
+    - Server

-    Args:
-        url_dict (dict): A dictionary of the form: version -> URL
-        name (str): The name of the package
-        first_stage_function (callable): function that takes a Stage and a URL;
-            this is run on the stage of the first URL downloaded
-        keep_stage (bool): whether to keep staging area when command completes
+    Every name considered is translated to one of the above names if the only
+    difference between the two is how the first letters of each word are
+    capitalized; whether words are separated; or, if separated, whether they
+    are so by a dash (-), underscore (_), or space ( ).  Header names that
+    cannot be mapped as described above are returned unaltered.

-    Returns:
-        (str): A multi-line string containing versions and corresponding hashes
+    For example: The standard spelling of "Content-length" would be substituted
+    for any of the following names:
+    - Content-length
+    - content_length
+    - contentlength
+    - content_Length
+    - contentLength
+    - content Length

+    ... and any other header name, such as "Content-encoding", would not be
+    altered, regardless of spelling.
+
+    If headers is a string, then it (or an appropriate substitute) is returned.
+
+    If headers is a non-empty tuple, headers[0] is a string, and there exists a
+    standardized spelling for header[0] that differs from it, then a new tuple
+    is returned.  This tuple has the same elements as headers, except the first
+    element is the standardized spelling for headers[0].
+
+    If headers is a sequence, then a new list is considered, where each element
+    is its corresponding element in headers, but mapped as above if a string or
+    tuple.  This new list is returned if at least one of its elements differ
+    from their corrsponding element in headers.
+
+    If headers is a mapping, then a new dict is considered, where the key in
+    each item is the key of its corresponding item in headers, mapped as above
+    if a string or tuple.  The value is taken from the corresponding item.  If
+    the keys of multiple items in headers map to the same key after being
+    standardized, then the value for the resulting item is undefined.  The new
+    dict is returned if at least one of its items has a key that differs from
+    that of their corresponding item in headers, or if the keys of multiple
+    items in headers map to the same key after being standardized.
+
+    In all other cases headers is returned unaltered.
    """
-    sorted_versions = sorted(url_dict.keys(), reverse=True)
+    if isinstance(headers, six.string_types):
+        for standardized_spelling, other_spellings in (
+                HTTP_HEADER_NAME_ALIASES.items()):
+            if headers in other_spellings:
+                if headers == standardized_spelling:
+                    return headers
+                return standardized_spelling
+        return headers

-    # Find length of longest string in the list for padding
-    max_len = max(len(str(v)) for v in sorted_versions)
-    num_ver = len(sorted_versions)
+    if isinstance(headers, tuple):
+        if not headers:
+            return headers
+        old = headers[0]
+        if isinstance(old, six.string_types):
+            new = standardize_header_names(old)
+            if old is not new:
+                return (new,) + headers[1:]
+        return headers

-    tty.msg("Found {0} version{1} of {2}:".format(
-            num_ver, '' if num_ver == 1 else 's', name),
-            "",
-            *spack.cmd.elide_list(
-                ["{0:{1}}  {2}".format(str(v), max_len, url_dict[v])
-                 for v in sorted_versions]))
-    print()
+    try:
+        changed = False
+        new_dict = {}
+        for key, value in headers.items():
+            if isinstance(key, (tuple, six.string_types)):
+                old_key, key = key, standardize_header_names(key)
+                changed = changed or key is not old_key

-    archives_to_fetch = tty.get_number(
-        "How many would you like to checksum?", default=1, abort='q')
+            new_dict[key] = value

-    if not archives_to_fetch:
-        tty.die("Aborted.")
+        return new_dict if changed else headers
+    except (AttributeError, TypeError, ValueError):
+        pass

-    versions = sorted_versions[:archives_to_fetch]
-    urls = [url_dict[v] for v in versions]
+    try:
+        changed = False
+        new_list = []
+        for item in headers:
+            if isinstance(item, (tuple, six.string_types)):
+                old_item, item = item, standardize_header_names(item)
+                changed = changed or item is not old_item

-    tty.msg("Downloading...")
-    version_hashes = []
-    i = 0
-    for url, version in zip(urls, versions):
-        try:
-            with spack.stage.Stage(url, keep=keep_stage) as stage:
-                # Fetch the archive
-                stage.fetch()
-                if i == 0 and first_stage_function:
-                    # Only run first_stage_function the first time,
-                    # no need to run it every time
-                    first_stage_function(stage, url)
+            new_list.append(item)

-                # Checksum the archive and add it to the list
-                version_hashes.append((version, spack.util.crypto.checksum(
-                    hashlib.sha256, stage.archive_file)))
-                i += 1
-        except spack.stage.FailedDownloadError:
-            tty.msg("Failed to fetch {0}".format(url))
-        except Exception as e:
-            tty.msg("Something failed on {0}, skipping.".format(url),
-                    "  ({0})".format(e))
+        return new_list if changed else headers
+    except TypeError:
+        pass

-    if not version_hashes:
-        tty.die("Could not fetch any versions for {0}".format(name))
-
-    # Find length of longest string in the list for padding
-    max_len = max(len(str(v)) for v, h in version_hashes)
-
-    # Generate the version directives to put in a package.py
-    version_lines = "\n".join([
-        "    version('{0}', {1}sha256='{2}')".format(
-            v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
-    ])
-
-    num_hash = len(version_hashes)
-    tty.msg("Checksummed {0} version{1} of {2}".format(
-        num_hash, '' if num_hash == 1 else 's', name))
-
-    return version_lines
+    return headers


 class SpackWebError(spack.error.SpackError):
    """Superclass for Spack web spidering errors."""


-class VersionFetchError(SpackWebError):
-    """Raised when we can't determine a URL to fetch a package."""
-
-
 class NoNetworkConnectionError(SpackWebError):
    """Raised when an operation can't get an internet connection."""
    def __init__(self, message, url):