fetching: S3 upload and download (#11117)

This extends Spack functionality so that it can fetch sources and binaries from-, push sources and binaries to-, and index the contents of- mirrors hosted on an S3 bucket.

High level to-do list:

- [x] Extend mirrors configuration to add support for `file://`, and `s3://` URLs.
- [x] Ensure all fetching, pushing, and indexing operations work for `file://` URLs.
- [x] Implement S3 source fetching
- [x] Implement S3 binary mirror indexing
- [x] Implement S3 binary package fetching
- [x] Implement S3 source pushing
- [x] Implement S3 binary package pushing

Important details:

* refactor URL handling to handle S3 URLs and mirror URLs more gracefully.
  - updated parse() to accept already-parsed URL objects.  an equivalent object
    is returned with any extra s3-related attributes intact.  Objects created with
    urllib can also be passed, and the additional s3 handling logic will still be applied.

* update mirror schema/parsing (mirror can have separate fetch/push URLs)
* implement s3_fetch_strategy/several utility changes
* provide more feature-complete S3 fetching
* update buildcache create command to support S3

* Move the core logic for reading data from S3 out of the s3 fetch strategy and into
  the s3 URL handler.  The s3 fetch strategy now calls into `read_from_url()` Since
  read_from_url can now handle S3 URLs, the S3 fetch strategy is redundant.  It's
  not clear whether the ideal design is to have S3 fetching functionality in a fetch
  strategy, directly implemented in read_from_url, or both.

* expanded what can be passed to `spack buildcache` via the -d flag: In addition
  to a directory on the local filesystem, the name of a configured mirror can be
  passed, or a push URL can be passed directly.
This commit is contained in:
Omar Padron 2019-10-22 03:32:04 -04:00 committed by Todd Gamblin
parent 6cb972a9d2
commit fd58c98b0e
21 changed files with 1411 additions and 280 deletions

View file

@ -1,3 +1,8 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
generate ci jobs:
script:
- "./bin/generate-gitlab-ci-yml.sh"

View file

@ -10,4 +10,4 @@ set -x
SPACK_BIN_DIR="${CI_PROJECT_DIR}/bin"
export PATH="${SPACK_BIN_DIR}:${PATH}"
spack upload-s3 index
spack buildcache update-index -d "$MIRROR_URL"

View file

@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import codecs
import os
import re
import tarfile
@ -23,14 +24,32 @@
import spack.util.gpg as gpg_util
import spack.relocate as relocate
import spack.util.spack_yaml as syaml
import spack.mirror
import spack.util.url as url_util
import spack.util.web as web_util
from spack.spec import Spec
from spack.stage import Stage
from spack.util.gpg import Gpg
from spack.util.web import spider, read_from_url
from spack.util.executable import ProcessError
_build_cache_relative_path = 'build_cache'
BUILD_CACHE_INDEX_TEMPLATE = '''
<html>
<head>
<title>{title}</title>
</head>
<body>
<ul>
{path_list}
</ul>
</body>
</html>
'''
BUILD_CACHE_INDEX_ENTRY_TEMPLATE = ' <li><a href="{path}">{path}</a></li>'
class NoOverwriteException(Exception):
"""
@ -101,7 +120,7 @@ def build_cache_relative_path():
return _build_cache_relative_path
def build_cache_directory(prefix):
def build_cache_prefix(prefix):
return os.path.join(prefix, build_cache_relative_path())
@ -246,29 +265,36 @@ def sign_tarball(key, force, specfile_path):
Gpg.sign(key, specfile_path, '%s.asc' % specfile_path)
def _generate_html_index(path_list, output_path):
f = open(output_path, 'w')
header = """<html>\n
<head>\n</head>\n
<list>\n"""
footer = "</list>\n</html>\n"
f.write(header)
for path in path_list:
rel = os.path.basename(path)
f.write('<li><a href="%s"> %s</a>\n' % (rel, rel))
f.write(footer)
f.close()
def generate_package_index(cache_prefix):
"""Create the build cache index page.
Creates (or replaces) the "index.html" page at the location given in
cache_prefix. This page contains a link for each binary package (*.yaml)
and signing key (*.key) under cache_prefix.
"""
tmpdir = tempfile.mkdtemp()
try:
index_html_path = os.path.join(tmpdir, 'index.html')
file_list = (
entry
for entry in web_util.list_url(cache_prefix)
if (entry.endswith('.yaml')
or entry.endswith('.key')))
def generate_package_index(build_cache_dir):
yaml_list = os.listdir(build_cache_dir)
path_list = [os.path.join(build_cache_dir, l) for l in yaml_list]
with open(index_html_path, 'w') as f:
f.write(BUILD_CACHE_INDEX_TEMPLATE.format(
title='Spack Package Index',
path_list='\n'.join(
BUILD_CACHE_INDEX_ENTRY_TEMPLATE.format(path=path)
for path in file_list)))
index_html_path_tmp = os.path.join(build_cache_dir, 'index.html.tmp')
index_html_path = os.path.join(build_cache_dir, 'index.html')
_generate_html_index(path_list, index_html_path_tmp)
shutil.move(index_html_path_tmp, index_html_path)
web_util.push_to_url(
index_html_path,
url_util.join(cache_prefix, 'index.html'),
keep_original=False,
extra_args={'ContentType': 'text/html'})
finally:
shutil.rmtree(tmpdir)
def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
@ -281,33 +307,41 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
raise ValueError('spec must be concrete to build tarball')
# set up some paths
build_cache_dir = build_cache_directory(outdir)
tmpdir = tempfile.mkdtemp()
cache_prefix = build_cache_prefix(tmpdir)
tarfile_name = tarball_name(spec, '.tar.gz')
tarfile_dir = os.path.join(build_cache_dir,
tarball_directory_name(spec))
tarfile_dir = os.path.join(cache_prefix, tarball_directory_name(spec))
tarfile_path = os.path.join(tarfile_dir, tarfile_name)
mkdirp(tarfile_dir)
spackfile_path = os.path.join(
build_cache_dir, tarball_path_name(spec, '.spack'))
if os.path.exists(spackfile_path):
cache_prefix, tarball_path_name(spec, '.spack'))
remote_spackfile_path = url_util.join(
outdir, os.path.relpath(spackfile_path, tmpdir))
mkdirp(tarfile_dir)
if web_util.url_exists(remote_spackfile_path):
if force:
os.remove(spackfile_path)
web_util.remove_url(remote_spackfile_path)
else:
raise NoOverwriteException(str(spackfile_path))
raise NoOverwriteException(url_util.format(remote_spackfile_path))
# need to copy the spec file so the build cache can be downloaded
# without concretizing with the current spack packages
# and preferences
spec_file = os.path.join(spec.prefix, ".spack", "spec.yaml")
specfile_name = tarball_name(spec, '.spec.yaml')
specfile_path = os.path.realpath(
os.path.join(build_cache_dir, specfile_name))
os.path.join(cache_prefix, specfile_name))
if os.path.exists(specfile_path):
remote_specfile_path = url_util.join(
outdir, os.path.relpath(specfile_path, os.path.realpath(tmpdir)))
if web_util.url_exists(remote_specfile_path):
if force:
os.remove(specfile_path)
web_util.remove_url(remote_specfile_path)
else:
raise NoOverwriteException(str(specfile_path))
raise NoOverwriteException(url_util.format(remote_specfile_path))
# make a copy of the install directory to work with
workdir = os.path.join(tempfile.mkdtemp(), os.path.basename(spec.prefix))
@ -324,6 +358,7 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
except Exception as e:
shutil.rmtree(workdir)
shutil.rmtree(tarfile_dir)
shutil.rmtree(tmpdir)
tty.die(e)
else:
try:
@ -331,7 +366,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
except Exception as e:
shutil.rmtree(workdir)
shutil.rmtree(tarfile_dir)
shutil.rmtree(tmpdir)
tty.die(e)
# create compressed tarball of the install prefix
with closing(tarfile.open(tarfile_path, 'w:gz')) as tar:
tar.add(name='%s' % workdir,
@ -360,7 +397,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
spec_dict['full_hash'] = spec.full_hash()
tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format(
spec_dict['full_hash'], spec.name, specfile_path))
spec_dict['full_hash'],
spec.name,
url_util.format(remote_specfile_path)))
tty.debug(spec.tree())
with open(specfile_path, 'w') as outfile:
@ -382,9 +421,19 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
if not unsigned:
os.remove('%s.asc' % specfile_path)
# create an index.html for the build_cache directory so specs can be found
if regenerate_index:
generate_package_index(build_cache_dir)
web_util.push_to_url(
spackfile_path, remote_spackfile_path, keep_original=False)
web_util.push_to_url(
specfile_path, remote_specfile_path, keep_original=False)
try:
# create an index.html for the build_cache directory so specs can be
# found
if regenerate_index:
generate_package_index(url_util.join(
outdir, os.path.relpath(cache_prefix, tmpdir)))
finally:
shutil.rmtree(tmpdir)
return None
@ -394,13 +443,16 @@ def download_tarball(spec):
Download binary tarball for given package into stage area
Return True if successful
"""
mirrors = spack.config.get('mirrors')
if len(mirrors) == 0:
if not spack.mirror.MirrorCollection():
tty.die("Please add a spack mirror to allow " +
"download of pre-compiled packages.")
tarball = tarball_path_name(spec, '.spack')
for mirror_name, mirror_url in mirrors.items():
url = mirror_url + '/' + _build_cache_relative_path + '/' + tarball
for mirror in spack.mirror.MirrorCollection().values():
url = url_util.join(
mirror.fetch_url, _build_cache_relative_path, tarball)
# stage the tarball into standard place
stage = Stage(url, name="build_cache", keep=True)
try:
@ -408,6 +460,7 @@ def download_tarball(spec):
return stage.save_filename
except fs.FetchError:
continue
return None
@ -610,26 +663,29 @@ def get_specs(force=False):
tty.debug("Using previously-retrieved specs")
return _cached_specs
mirrors = spack.config.get('mirrors')
if len(mirrors) == 0:
tty.debug("No Spack mirrors are currently configured")
if not spack.mirror.MirrorCollection():
tty.warn("No Spack mirrors are currently configured")
return {}
urls = set()
for mirror_name, mirror_url in mirrors.items():
if mirror_url.startswith('file'):
mirror = mirror_url.replace(
'file://', '') + "/" + _build_cache_relative_path
tty.msg("Finding buildcaches in %s" % mirror)
if os.path.exists(mirror):
files = os.listdir(mirror)
for mirror in spack.mirror.MirrorCollection().values():
fetch_url_build_cache = url_util.join(
mirror.fetch_url, _build_cache_relative_path)
mirror_dir = url_util.local_file_path(fetch_url_build_cache)
if mirror_dir:
tty.msg("Finding buildcaches in %s" % mirror_dir)
if os.path.exists(mirror_dir):
files = os.listdir(mirror_dir)
for file in files:
if re.search('spec.yaml', file):
link = 'file://' + mirror + '/' + file
link = url_util.join(fetch_url_build_cache, file)
urls.add(link)
else:
tty.msg("Finding buildcaches on %s" % mirror_url)
p, links = spider(mirror_url + "/" + _build_cache_relative_path)
tty.msg("Finding buildcaches at %s" %
url_util.format(fetch_url_build_cache))
p, links = web_util.spider(
url_util.join(fetch_url_build_cache, 'index.html'))
for link in links:
if re.search("spec.yaml", link):
urls.add(link)
@ -659,28 +715,33 @@ def get_keys(install=False, trust=False, force=False):
"""
Get pgp public keys available on mirror
"""
mirrors = spack.config.get('mirrors')
if len(mirrors) == 0:
if not spack.mirror.MirrorCollection():
tty.die("Please add a spack mirror to allow " +
"download of build caches.")
keys = set()
for mirror_name, mirror_url in mirrors.items():
if mirror_url.startswith('file'):
mirror = os.path.join(
mirror_url.replace('file://', ''), _build_cache_relative_path)
tty.msg("Finding public keys in %s" % mirror)
files = os.listdir(mirror)
for mirror in spack.mirror.MirrorCollection().values():
fetch_url_build_cache = url_util.join(
mirror.fetch_url, _build_cache_relative_path)
mirror_dir = url_util.local_file_path(fetch_url_build_cache)
if mirror_dir:
tty.msg("Finding public keys in %s" % mirror_dir)
files = os.listdir(mirror_dir)
for file in files:
if re.search(r'\.key', file):
link = 'file://' + mirror + '/' + file
link = url_util.join(fetch_url_build_cache, file)
keys.add(link)
else:
tty.msg("Finding public keys on %s" % mirror_url)
p, links = spider(mirror_url + "/build_cache", depth=1)
tty.msg("Finding public keys at %s" %
url_util.format(fetch_url_build_cache))
p, links = web_util.spider(fetch_url_build_cache, depth=1)
for link in links:
if re.search(r'\.key', link):
keys.add(link)
for link in keys:
with Stage(link, name="build_cache", keep=True) as stage:
if os.path.exists(stage.save_filename) and force:
@ -717,15 +778,16 @@ def needs_rebuild(spec, mirror_url, rebuild_on_errors=False):
# Try to retrieve the .spec.yaml directly, based on the known
# format of the name, in order to determine if the package
# needs to be rebuilt.
build_cache_dir = build_cache_directory(mirror_url)
cache_prefix = build_cache_prefix(mirror_url)
spec_yaml_file_name = tarball_name(spec, '.spec.yaml')
file_path = os.path.join(build_cache_dir, spec_yaml_file_name)
file_path = os.path.join(cache_prefix, spec_yaml_file_name)
result_of_error = 'Package ({0}) will {1}be rebuilt'.format(
spec.short_spec, '' if rebuild_on_errors else 'not ')
try:
yaml_contents = read_from_url(file_path)
_, _, yaml_file = web_util.read_from_url(file_path)
yaml_contents = codecs.getreader('utf-8')(yaml_file).read()
except URLError as url_err:
err_msg = [
'Unable to determine whether {0} needs rebuilding,',
@ -782,22 +844,22 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
"""
rebuilds = {}
for mirror_name, mirror_url in mirrors.items():
tty.msg('Checking for built specs at %s' % mirror_url)
for mirror in spack.mirror.MirrorCollection(mirrors).values():
tty.msg('Checking for built specs at %s' % mirror.fetch_url)
rebuild_list = []
for spec in specs:
if needs_rebuild(spec, mirror_url, rebuild_on_errors):
if needs_rebuild(spec, mirror.fetch_url, rebuild_on_errors):
rebuild_list.append({
'short_spec': spec.short_spec,
'hash': spec.dag_hash()
})
if rebuild_list:
rebuilds[mirror_url] = {
'mirrorName': mirror_name,
'mirrorUrl': mirror_url,
rebuilds[mirror.fetch_url] = {
'mirrorName': mirror.name,
'mirrorUrl': mirror.fetch_url,
'rebuildSpecs': rebuild_list
}
@ -810,33 +872,36 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
def _download_buildcache_entry(mirror_root, descriptions):
for description in descriptions:
url = os.path.join(mirror_root, description['url'])
description_url = os.path.join(mirror_root, description['url'])
path = description['path']
fail_if_missing = description['required']
mkdirp(path)
stage = Stage(url, name="build_cache", path=path, keep=True)
stage = Stage(
description_url, name="build_cache", path=path, keep=True)
try:
stage.fetch()
except fs.FetchError as e:
tty.debug(e)
if fail_if_missing:
tty.error('Failed to download required url {0}'.format(url))
tty.error('Failed to download required url {0}'.format(
description_url))
return False
return True
def download_buildcache_entry(file_descriptions):
mirrors = spack.config.get('mirrors')
if len(mirrors) == 0:
if not spack.mirror.MirrorCollection():
tty.die("Please add a spack mirror to allow " +
"download of buildcache entries.")
for mirror_name, mirror_url in mirrors.items():
mirror_root = os.path.join(mirror_url, _build_cache_relative_path)
for mirror in spack.mirror.MirrorCollection().values():
mirror_root = os.path.join(
mirror.fetch_url,
_build_cache_relative_path)
if _download_buildcache_entry(mirror_root, file_descriptions):
return True

View file

@ -9,11 +9,13 @@
import llnl.util.lang
from llnl.util.filesystem import mkdirp
import spack.error
import spack.paths
import spack.config
import spack.fetch_strategy
import spack.util.file_cache
from spack.util.path import canonicalize_path
import spack.util.path
import spack.util.url as url_util
def _misc_cache():
@ -25,7 +27,7 @@ def _misc_cache():
path = spack.config.get('config:misc_cache')
if not path:
path = os.path.join(spack.paths.user_config_path, 'cache')
path = canonicalize_path(path)
path = spack.util.path.canonicalize_path(path)
return spack.util.file_cache.FileCache(path)
@ -43,22 +45,26 @@ def _fetch_cache():
path = spack.config.get('config:source_cache')
if not path:
path = os.path.join(spack.paths.var_path, "cache")
path = canonicalize_path(path)
path = spack.util.path.canonicalize_path(path)
return spack.fetch_strategy.FsCache(path)
class MirrorCache(object):
def __init__(self, root):
self.root = os.path.abspath(root)
self.root = url_util.local_file_path(root)
if not self.root:
raise spack.error.SpackError(
'MirrorCaches only work with file:// URLs')
self.new_resources = set()
self.existing_resources = set()
def store(self, fetcher, relative_dest):
# Note this will archive package sources even if they would not
# normally be cached (e.g. the current tip of an hg/git branch)
dst = os.path.join(self.root, relative_dest)
if os.path.exists(dst):
self.existing_resources.add(relative_dest)
else:

View file

@ -14,6 +14,7 @@
import spack.cmd.common.arguments as arguments
import spack.environment as ev
import spack.hash_types as ht
import spack.mirror
import spack.relocate
import spack.repo
import spack.spec
@ -21,6 +22,8 @@
import spack.config
import spack.repo
import spack.store
import spack.util.url as url_util
from spack.error import SpecError
from spack.spec import Spec, save_dependency_spec_yamls
@ -205,6 +208,13 @@ def setup_parser(subparser):
help='Destination mirror url')
copy.set_defaults(func=buildcache_copy)
# Update buildcache index without copying any additional packages
update_index = subparsers.add_parser(
'update-index', help=buildcache_update_index.__doc__)
update_index.add_argument(
'-d', '--mirror-url', default=None, help='Destination mirror url')
update_index.set_defaults(func=buildcache_update_index)
def find_matching_specs(pkgs, allow_multiple_matches=False, env=None):
"""Returns a list of specs matching the not necessarily
@ -312,9 +322,14 @@ def createtarball(args):
" yaml file containing a spec to install")
pkgs = set(packages)
specs = set()
outdir = '.'
if args.directory:
outdir = args.directory
mirror = spack.mirror.MirrorCollection().lookup(outdir)
outdir = url_util.format(mirror.push_url)
signkey = None
if args.key:
signkey = args.key
@ -649,6 +664,19 @@ def buildcache_copy(args):
shutil.copyfile(cdashid_src_path, cdashid_dest_path)
def buildcache_update_index(args):
"""Update a buildcache index."""
outdir = '.'
if args.mirror_url:
outdir = args.mirror_url
mirror = spack.mirror.MirrorCollection().lookup(outdir)
outdir = url_util.format(mirror.push_url)
bindist.generate_package_index(
url_util.join(outdir, bindist.build_cache_relative_path()))
def buildcache(parser, args):
if args.func:
args.func(args)

View file

@ -11,8 +11,8 @@
import spack.cmd
import spack.repo
import spack.stage
import spack.util.crypto
import spack.util.web
from spack.util.naming import valid_fully_qualified_module_name
from spack.version import ver, Version
@ -56,7 +56,7 @@ def checksum(parser, args):
if not url_dict:
tty.die("Could not find any versions for {0}".format(pkg.name))
version_lines = spack.util.web.get_checksums_for_versions(
version_lines = spack.stage.get_checksums_for_versions(
url_dict, pkg.name, keep_stage=args.keep_stage)
print()

View file

@ -13,6 +13,7 @@
import spack.util.web
import spack.repo
import spack.stage
from spack.spec import Spec
from spack.util.editor import editor
from spack.util.executable import which, ProcessError
@ -618,7 +619,7 @@ def get_versions(args, name):
version = parse_version(args.url)
url_dict = {version: args.url}
versions = spack.util.web.get_checksums_for_versions(
versions = spack.stage.get_checksums_for_versions(
url_dict, name, first_stage_function=guesser,
keep_stage=args.keep_stage)
else:

View file

@ -4,20 +4,21 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import sys
import os
from datetime import datetime
import argparse
import llnl.util.tty as tty
from llnl.util.tty.colify import colify
import spack.cmd
import spack.cmd.common.arguments as arguments
import spack.concretize
import spack.config
import spack.environment as ev
import spack.mirror
import spack.repo
import spack.cmd.common.arguments as arguments
import spack.environment as ev
import spack.util.url as url_util
import spack.util.web as web_util
from spack.spec import Spec
from spack.error import SpackError
from spack.util.spack_yaml import syaml_dict
@ -73,6 +74,19 @@ def setup_parser(subparser):
default=spack.config.default_modify_scope(),
help="configuration scope to modify")
# Set-Url
set_url_parser = sp.add_parser('set-url', help=mirror_set_url.__doc__)
set_url_parser.add_argument('name', help="mnemonic name for mirror")
set_url_parser.add_argument(
'url', help="url of mirror directory from 'spack mirror create'")
set_url_parser.add_argument(
'--push', action='store_true',
help="set only the URL used for uploading new packages")
set_url_parser.add_argument(
'--scope', choices=scopes, metavar=scopes_metavar,
default=spack.config.default_modify_scope(),
help="configuration scope to modify")
# List
list_parser = sp.add_parser('list', help=mirror_list.__doc__)
list_parser.add_argument(
@ -83,20 +97,14 @@ def setup_parser(subparser):
def mirror_add(args):
"""Add a mirror to Spack."""
url = args.url
if url.startswith('/'):
url = 'file://' + url
url = url_util.format(args.url)
mirrors = spack.config.get('mirrors', scope=args.scope)
if not mirrors:
mirrors = syaml_dict()
for name, u in mirrors.items():
if name == args.name:
tty.die("Mirror with name %s already exists." % name)
if u == url:
tty.die("Mirror with url %s already exists." % url)
# should only be one item per mirror dict.
if args.name in mirrors:
tty.die("Mirror with name %s already exists." % args.name)
items = [(n, u) for n, u in mirrors.items()]
items.insert(0, (args.name, url))
@ -117,21 +125,86 @@ def mirror_remove(args):
old_value = mirrors.pop(name)
spack.config.set('mirrors', mirrors, scope=args.scope)
tty.msg("Removed mirror %s with url %s" % (name, old_value))
debug_msg_url = "url %s"
debug_msg = ["Removed mirror %s with"]
values = [name]
try:
fetch_value = old_value['fetch']
push_value = old_value['push']
debug_msg.extend(("fetch", debug_msg_url, "and push", debug_msg_url))
values.extend((fetch_value, push_value))
except TypeError:
debug_msg.append(debug_msg_url)
values.append(old_value)
tty.debug(" ".join(debug_msg) % tuple(values))
tty.msg("Removed mirror %s." % name)
def mirror_set_url(args):
"""Change the URL of a mirror."""
url = url_util.format(args.url)
mirrors = spack.config.get('mirrors', scope=args.scope)
if not mirrors:
mirrors = syaml_dict()
if args.name not in mirrors:
tty.die("No mirror found with name %s." % args.name)
entry = mirrors[args.name]
try:
fetch_url = entry['fetch']
push_url = entry['push']
except TypeError:
fetch_url, push_url = entry, entry
changes_made = False
if args.push:
changes_made = changes_made or push_url != url
push_url = url
else:
changes_made = (
changes_made or fetch_url != push_url or push_url != url)
fetch_url, push_url = url, url
items = [
(
(n, u)
if n != args.name else (
(n, {"fetch": fetch_url, "push": push_url})
if fetch_url != push_url else (n, fetch_url)
)
)
for n, u in mirrors.items()
]
mirrors = syaml_dict(items)
spack.config.set('mirrors', mirrors, scope=args.scope)
if changes_made:
tty.msg(
"Changed%s url for mirror %s." %
((" (push)" if args.push else ""), args.name))
else:
tty.msg("Url already set for mirror %s." % args.name)
def mirror_list(args):
"""Print out available mirrors to the console."""
mirrors = spack.config.get('mirrors', scope=args.scope)
mirrors = spack.mirror.MirrorCollection(scope=args.scope)
if not mirrors:
tty.msg("No mirrors configured.")
return
max_len = max(len(n) for n in mirrors.keys())
fmt = "%%-%ds%%s" % (max_len + 4)
for name in mirrors:
print(fmt % (name, mirrors[name]))
mirrors.display()
def _read_specs_from_file(filename):
@ -188,14 +261,13 @@ def mirror_create(args):
msg = 'Skipping {0} as it is an external spec.'
tty.msg(msg.format(spec.cshort_spec))
# Default name for directory is spack-mirror-<DATESTAMP>
directory = args.directory
if not directory:
timestamp = datetime.now().strftime("%Y-%m-%d")
directory = 'spack-mirror-' + timestamp
mirror = spack.mirror.Mirror(
args.directory or spack.config.get('config:source_cache'))
directory = url_util.format(mirror.push_url)
# Make sure nothing is in the way.
existed = os.path.isdir(directory)
existed = web_util.url_exists(directory)
# Actually do the work to create the mirror
present, mirrored, error = spack.mirror.create(
@ -220,6 +292,7 @@ def mirror(parser, args):
'add': mirror_add,
'remove': mirror_remove,
'rm': mirror_remove,
'set-url': mirror_set_url,
'list': mirror_list}
if args.no_checksum:

View file

@ -5,10 +5,8 @@
from __future__ import division, print_function
from collections import defaultdict
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
import six.moves.urllib.parse as urllib_parse
import spack.fetch_strategy as fs
import spack.repo
@ -262,7 +260,7 @@ def add(self, fetcher):
self.checksums[algo] += 1
# parse out the URL scheme (https/http/ftp/etc.)
urlinfo = urlparse(fetcher.url)
urlinfo = urllib_parse.urlparse(fetcher.url)
self.schemes[urlinfo.scheme] += 1
elif url_type == 'git':

View file

@ -23,6 +23,7 @@
Archive a source directory, e.g. for creating a mirror.
"""
import os
import os.path
import sys
import re
import shutil
@ -30,6 +31,7 @@
import xml.etree.ElementTree
from functools import wraps
from six import string_types, with_metaclass
import six.moves.urllib.parse as urllib_parse
import llnl.util.tty as tty
from llnl.util.filesystem import (
@ -39,6 +41,9 @@
import spack.error
import spack.util.crypto as crypto
import spack.util.pattern as pattern
import spack.util.web as web_util
import spack.util.url as url_util
from spack.util.executable import which
from spack.util.string import comma_and, quote
from spack.version import Version, ver
@ -48,6 +53,17 @@
#: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = []
CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = (
"The contents of {subject} look like {content_type}. Either the URL"
" you are trying to use does not exist or you have an internet gateway"
" issue. You can remove the bad archive using 'spack clean"
" <package>', then try again using the correct URL.")
def warn_content_type_mismatch(subject, content_type='HTML'):
tty.warn(CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(
subject=subject, content_type=content_type))
def _needs_stage(fun):
"""Many methods on fetch strategies require a stage to be set
@ -351,12 +367,7 @@ def fetch(self):
content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
flags=re.IGNORECASE)
if content_types and 'text/html' in content_types[-1]:
msg = ("The contents of {0} look like HTML. Either the URL "
"you are trying to use does not exist or you have an "
"internet gateway issue. You can remove the bad archive "
"using 'spack clean <package>', then try again using "
"the correct URL.")
tty.warn(msg.format(self.archive_file or "the archive"))
warn_content_type_mismatch(self.archive_file or "the archive")
if save_file:
os.rename(partial_file, save_file)
@ -449,7 +460,10 @@ def archive(self, destination):
if not self.archive_file:
raise NoArchiveFileError("Cannot call archive() before fetching.")
shutil.copyfile(self.archive_file, destination)
web_util.push_to_url(
self.archive_file,
destination,
keep_original=True)
@_needs_stage
def check(self):
@ -1063,6 +1077,54 @@ def __str__(self):
return "[hg] %s" % self.url
class S3FetchStrategy(URLFetchStrategy):
"""FetchStrategy that pulls from an S3 bucket."""
enabled = True
url_attr = 's3'
def __init__(self, *args, **kwargs):
try:
super(S3FetchStrategy, self).__init__(*args, **kwargs)
except ValueError:
if not kwargs.get('url'):
raise ValueError(
"S3FetchStrategy requires a url for fetching.")
@_needs_stage
def fetch(self):
if self.archive_file:
tty.msg("Already downloaded %s" % self.archive_file)
return
parsed_url = url_util.parse(self.url)
if parsed_url.scheme != 's3':
raise ValueError(
'S3FetchStrategy can only fetch from s3:// urls.')
tty.msg("Fetching %s" % self.url)
basename = os.path.basename(parsed_url.path)
with working_dir(self.stage.path):
_, headers, stream = web_util.read_from_url(self.url)
with open(basename, 'wb') as f:
shutil.copyfileobj(stream, f)
content_type = headers['Content-type']
if content_type == 'text/html':
warn_content_type_mismatch(self.archive_file or "the archive")
if self.stage.save_filename:
os.rename(
os.path.join(self.stage.path, basename),
self.stage.save_filename)
if not self.archive_file:
raise FailedDownloadError(self.url)
def from_url(url):
"""Given a URL, find an appropriate fetch strategy for it.
Currently just gives you a URLFetchStrategy that uses curl.
@ -1206,6 +1268,34 @@ def for_package_version(pkg, version):
raise InvalidArgsError(pkg, version, **args)
def from_url_scheme(url, *args, **kwargs):
"""Finds a suitable FetchStrategy by matching its url_attr with the scheme
in the given url."""
url = kwargs.get('url', url)
parsed_url = urllib_parse.urlparse(url, scheme='file')
scheme_mapping = (
kwargs.get('scheme_mapping') or
{
'file': 'url',
'http': 'url',
'https': 'url'
})
scheme = parsed_url.scheme
scheme = scheme_mapping.get(scheme, scheme)
for fetcher in all_strategies:
url_attr = getattr(fetcher, 'url_attr', None)
if url_attr and url_attr == scheme:
return fetcher(url, *args, **kwargs)
raise ValueError(
'No FetchStrategy found for url with scheme: "{SCHEME}"'.format(
SCHEME=parsed_url.scheme))
def from_list_url(pkg):
"""If a package provides a URL which lists URLs for resources by
version, this can can create a fetcher for a URL discovered for

View file

@ -13,6 +13,18 @@
"""
import sys
import os
import os.path
import operator
import six
import ruamel.yaml.error as yaml_error
try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp
@ -20,9 +32,205 @@
import spack.error
import spack.url as url
import spack.fetch_strategy as fs
from spack.spec import Spec
import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
import spack.util.url as url_util
import spack.spec
from spack.version import VersionList
from spack.util.compression import allowed_archive
from spack.util.spack_yaml import syaml_dict
def _display_mirror_entry(size, name, url, type_=None):
if type_:
type_ = "".join((" (", type_, ")"))
else:
type_ = ""
print("%-*s%s%s" % (size + 4, name, url, type_))
class Mirror(object):
"""Represents a named location for storing source tarballs and binary
packages.
Mirrors have a fetch_url that indicate where and how artifacts are fetched
from them, and a push_url that indicate where and how artifacts are pushed
to them. These two URLs are usually the same.
"""
def __init__(self, fetch_url, push_url=None, name=None):
self._fetch_url = fetch_url
self._push_url = push_url
self._name = name
def to_json(self, stream=None):
return sjson.dump(self.to_dict(), stream)
def to_yaml(self, stream=None):
return syaml.dump(self.to_dict(), stream)
@staticmethod
def from_yaml(stream, name=None):
try:
data = syaml.load(stream)
return Mirror.from_dict(data, name)
except yaml_error.MarkedYAMLError as e:
raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
@staticmethod
def from_json(stream, name=None):
d = sjson.load(stream)
return Mirror.from_dict(d, name)
def to_dict(self):
if self._push_url is None:
return self._fetch_url
else:
return syaml_dict([
('fetch', self._fetch_url),
('push', self._push_url)])
@staticmethod
def from_dict(d, name=None):
if isinstance(d, six.string_types):
return Mirror(d, name=name)
else:
return Mirror(d['fetch'], d['push'], name)
def display(self, max_len=0):
if self._push_url is None:
_display_mirror_entry(max_len, self._name, self._fetch_url)
else:
_display_mirror_entry(
max_len, self._name, self._fetch_url, "fetch")
_display_mirror_entry(
max_len, self._name, self._push_url, "push")
def __str__(self):
name = self._name
if name is None:
name = ''
else:
name = ' "%s"' % name
if self._push_url is None:
return "[Mirror%s (%s)]" % (name, self._fetch_url)
return "[Mirror%s (fetch: %s, push: %s)]" % (
name, self._fetch_url, self._push_url)
def __repr__(self):
return ''.join((
'Mirror(',
', '.join(
'%s=%s' % (k, repr(v))
for k, v in (
('fetch_url', self._fetch_url),
('push_url', self._push_url),
('name', self._name))
if k == 'fetch_url' or v),
')'
))
@property
def name(self):
return self._name or "<unnamed>"
@property
def fetch_url(self):
return self._fetch_url
@fetch_url.setter
def fetch_url(self, url):
self._fetch_url = url
self._normalize()
@property
def push_url(self):
if self._push_url is None:
return self._fetch_url
return self._push_url
@push_url.setter
def push_url(self, url):
self._push_url = url
self._normalize()
def _normalize(self):
if self._push_url is not None and self._push_url == self._fetch_url:
self._push_url = None
class MirrorCollection(Mapping):
"""A mapping of mirror names to mirrors."""
def __init__(self, mirrors=None, scope=None):
self._mirrors = dict(
(name, Mirror.from_dict(mirror, name))
for name, mirror in (
mirrors.items() if mirrors is not None else
spack.config.get('mirrors', scope=scope).items()))
def to_json(self, stream=None):
return sjson.dump(self.to_dict(True), stream)
def to_yaml(self, stream=None):
return syaml.dump(self.to_dict(True), stream)
@staticmethod
def from_yaml(stream, name=None):
try:
data = syaml.load(stream)
return MirrorCollection(data)
except yaml_error.MarkedYAMLError as e:
raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
@staticmethod
def from_json(stream, name=None):
d = sjson.load(stream)
return MirrorCollection(d)
def to_dict(self, recursive=False):
return syaml_dict(sorted(
(
(k, (v.to_dict() if recursive else v))
for (k, v) in self._mirrors.items()
), key=operator.itemgetter(0)
))
@staticmethod
def from_dict(d):
return MirrorCollection(d)
def __getitem__(self, item):
return self._mirrors[item]
def display(self):
max_len = max(len(mirror.name) for mirror in self._mirrors.values())
for mirror in self._mirrors.values():
mirror.display(max_len)
def lookup(self, name_or_url):
"""Looks up and returns a Mirror.
If this MirrorCollection contains a named Mirror under the name
[name_or_url], then that mirror is returned. Otherwise, [name_or_url]
is assumed to be a mirror URL, and an anonymous mirror with the given
URL is returned.
"""
result = self.get(name_or_url)
if result is None:
result = Mirror(fetch_url=name_or_url)
return result
def __iter__(self):
return iter(self._mirrors)
def __len__(self):
return len(self._mirrors)
def mirror_archive_filename(spec, fetcher, resource_id=None):
@ -114,7 +322,7 @@ def get_matching_versions(specs, **kwargs):
# Generate only versions that satisfy the spec.
if spec.concrete or v.satisfies(spec.versions):
s = Spec(pkg.name)
s = spack.spec.Spec(pkg.name)
s.versions = VersionList([v])
s.variants = spec.variants.copy()
# This is needed to avoid hanging references during the
@ -166,12 +374,17 @@ def create(path, specs, **kwargs):
it creates specs for those versions. If the version satisfies any spec
in the specs list, it is downloaded and added to the mirror.
"""
parsed = url_util.parse(path)
mirror_root = url_util.local_file_path(parsed)
# Make sure nothing is in the way.
if os.path.isfile(path):
raise MirrorError("%s already exists and is a file." % path)
if mirror_root and os.path.isfile(mirror_root):
raise MirrorError("%s already exists and is a file." % mirror_root)
# automatically spec-ify anything in the specs array.
specs = [s if isinstance(s, Spec) else Spec(s) for s in specs]
specs = [
s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s)
for s in specs]
# Get concrete specs for each matching version of these specs.
version_specs = get_matching_versions(
@ -180,8 +393,7 @@ def create(path, specs, **kwargs):
s.concretize()
# Get the absolute path of the root before we start jumping around.
mirror_root = os.path.abspath(path)
if not os.path.isdir(mirror_root):
if mirror_root and not os.path.isdir(mirror_root):
try:
mkdirp(mirror_root)
except OSError as e:
@ -195,12 +407,12 @@ def create(path, specs, **kwargs):
'error': []
}
mirror_cache = spack.caches.MirrorCache(mirror_root)
mirror_cache = spack.caches.MirrorCache(parsed)
try:
spack.caches.mirror_cache = mirror_cache
# Iterate through packages and download all safe tarballs for each
for spec in version_specs:
add_single_spec(spec, mirror_root, categories, **kwargs)
add_single_spec(spec, parsed, categories, **kwargs)
finally:
spack.caches.mirror_cache = None

View file

@ -0,0 +1,92 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from io import BufferedReader
import six.moves.urllib.response as urllib_response
import six.moves.urllib.request as urllib_request
import six.moves.urllib.error as urllib_error
import spack.util.s3 as s3_util
import spack.util.url as url_util
import spack.util.web as web_util
# NOTE(opadron): Workaround issue in boto where its StreamingBody
# implementation is missing several APIs expected from IOBase. These missing
# APIs prevent the streams returned by boto from being passed as-are along to
# urllib.
#
# https://github.com/boto/botocore/issues/879
# https://github.com/python/cpython/pull/3249
class WrapStream(BufferedReader):
def __init__(self, raw):
raw.readable = lambda: True
raw.writable = lambda: False
raw.seekable = lambda: False
raw.closed = False
raw.flush = lambda: None
super(WrapStream, self).__init__(raw)
def detach(self):
self.raw = None
def read(self, *args, **kwargs):
return self.raw.read(*args, **kwargs)
def __getattr__(self, key):
return getattr(self.raw, key)
def _s3_open(url):
parsed = url_util.parse(url)
s3 = s3_util.create_s3_session(parsed)
bucket = parsed.netloc
key = parsed.path
if key.startswith('/'):
key = key[1:]
obj = s3.get_object(Bucket=bucket, Key=key)
# NOTE(opadron): Apply workaround here (see above)
stream = WrapStream(obj['Body'])
headers = web_util.standardize_header_names(
obj['ResponseMetadata']['HTTPHeaders'])
return url, headers, stream
class UrllibS3Handler(urllib_request.HTTPSHandler):
def s3_open(self, req):
orig_url = req.get_full_url()
from botocore.exceptions import ClientError
try:
url, headers, stream = _s3_open(orig_url)
return urllib_response.addinfourl(stream, headers, url)
except ClientError as err:
# if no such [KEY], but [KEY]/index.html exists,
# return that, instead.
if err.response['Error']['Code'] == 'NoSuchKey':
try:
_, headers, stream = _s3_open(
url_util.join(orig_url, 'index.html'))
return urllib_response.addinfourl(
stream, headers, orig_url)
except ClientError as err2:
if err.response['Error']['Code'] == 'NoSuchKey':
# raise original error
raise urllib_error.URLError(err)
raise urllib_error.URLError(err2)
raise urllib_error.URLError(err)
S3OpenerDirector = urllib_request.build_opener(UrllibS3Handler())
open = S3OpenerDirector.open

View file

@ -17,7 +17,19 @@
'default': {},
'additionalProperties': False,
'patternProperties': {
r'\w[\w-]*': {'type': 'string'},
r'\w[\w-]*': {
'anyOf': [
{'type': 'string'},
{
'type': 'object',
'required': ['fetch', 'push'],
'properties': {
'fetch': {'type': 'string'},
'push': {'type': 'string'}
}
}
]
},
},
},
}

View file

@ -12,7 +12,6 @@
import getpass
from six import string_types
from six import iteritems
from six.moves.urllib.parse import urljoin
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp, can_access, install, install_tree
@ -20,12 +19,16 @@
import spack.paths
import spack.caches
import spack.cmd
import spack.config
import spack.error
import spack.mirror
import spack.util.lock
import spack.fetch_strategy as fs
import spack.util.pattern as pattern
import spack.util.path as sup
import spack.util.url as url_util
from spack.util.crypto import prefix_bits, bit_length
@ -252,7 +255,7 @@ def __init__(
# TODO: fetch/stage coupling needs to be reworked -- the logic
# TODO: here is convoluted and not modular enough.
if isinstance(url_or_fetch_strategy, string_types):
self.fetcher = fs.from_url(url_or_fetch_strategy)
self.fetcher = fs.from_url_scheme(url_or_fetch_strategy)
elif isinstance(url_or_fetch_strategy, fs.FetchStrategy):
self.fetcher = url_or_fetch_strategy
else:
@ -397,16 +400,9 @@ def fetch(self, mirror_only=False):
# TODO: CompositeFetchStrategy here.
self.skip_checksum_for_mirror = True
if self.mirror_path:
mirrors = spack.config.get('mirrors')
# Join URLs of mirror roots with mirror paths. Because
# urljoin() will strip everything past the final '/' in
# the root, so we add a '/' if it is not present.
mir_roots = [
sup.substitute_path_variables(root) if root.endswith(os.sep)
else sup.substitute_path_variables(root) + os.sep
for root in mirrors.values()]
urls = [urljoin(root, self.mirror_path) for root in mir_roots]
urls = [
url_util.join(mirror.fetch_url, self.mirror_path)
for mirror in spack.mirror.MirrorCollection().values()]
# If this archive is normally fetched from a tarball URL,
# then use the same digest. `spack mirror` ensures that
@ -425,9 +421,12 @@ def fetch(self, mirror_only=False):
# Add URL strategies for all the mirrors with the digest
for url in urls:
fetchers.insert(
0, fs.URLFetchStrategy(
url, digest, expand=expand, extension=extension))
fetchers.append(fs.from_url_scheme(
url, digest, expand=expand, extension=extension))
# fetchers.insert(
# 0, fs.URLFetchStrategy(
# url, digest, expand=expand, extension=extension))
if self.default_fetcher.cachable:
fetchers.insert(
0, spack.caches.fetch_cache.fetcher(
@ -708,6 +707,91 @@ def purge():
remove_linked_tree(stage_path)
def get_checksums_for_versions(
url_dict, name, first_stage_function=None, keep_stage=False):
"""Fetches and checksums archives from URLs.
This function is called by both ``spack checksum`` and ``spack
create``. The ``first_stage_function`` argument allows the caller to
inspect the first downloaded archive, e.g., to determine the build
system.
Args:
url_dict (dict): A dictionary of the form: version -> URL
name (str): The name of the package
first_stage_function (callable): function that takes a Stage and a URL;
this is run on the stage of the first URL downloaded
keep_stage (bool): whether to keep staging area when command completes
Returns:
(str): A multi-line string containing versions and corresponding hashes
"""
sorted_versions = sorted(url_dict.keys(), reverse=True)
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v in sorted_versions)
num_ver = len(sorted_versions)
tty.msg("Found {0} version{1} of {2}:".format(
num_ver, '' if num_ver == 1 else 's', name),
"",
*spack.cmd.elide_list(
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
for v in sorted_versions]))
tty.msg('')
archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=1, abort='q')
if not archives_to_fetch:
tty.die("Aborted.")
versions = sorted_versions[:archives_to_fetch]
urls = [url_dict[v] for v in versions]
tty.msg("Downloading...")
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)
# Checksum the archive and add it to the list
version_hashes.append((version, spack.util.crypto.checksum(
hashlib.sha256, stage.archive_file)))
i += 1
except FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))
if not version_hashes:
tty.die("Could not fetch any versions for {0}".format(name))
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)
# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}sha256='{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))
return version_lines
class StageError(spack.error.SpackError):
""""Superclass for all errors encountered during staging."""
@ -720,5 +804,9 @@ class RestageError(StageError):
""""Error encountered during restaging."""
class VersionFetchError(StageError):
"""Raised when we can't determine a URL to fetch a package."""
# Keep this in namespace for convenience
FailedDownloadError = fs.FailedDownloadError

View file

@ -53,6 +53,8 @@ def mock_pkg_git_repo(tmpdir_factory):
# initial commit with mock packages
git('add', '.')
git('config', 'user.email', 'testing@spack.io')
git('config', 'user.name', 'Spack Testing')
git('commit', '-m', 'initial mock repo commit')
# add commit with pkg-a, pkg-b, pkg-c packages

View file

@ -595,6 +595,7 @@ def test_bad_config_section(mock_config):
spack.config.get('foobar')
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_bad_command_line_scopes(tmpdir, mock_config):
cfg = spack.config.Configuration()

View file

@ -546,6 +546,7 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path):
timeout_write(lock_path, 5, 1))
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_read_lock_on_read_only_lockfile(lock_dir, lock_path):
"""read-only directory, read-only lockfile."""
touch(lock_path)
@ -573,6 +574,7 @@ def test_read_lock_read_only_dir_writable_lockfile(lock_dir, lock_path):
pass
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_read_lock_no_lockfile(lock_dir, lock_path):
"""read-only directory, no lockfile (so can't create)."""
with read_only(lock_dir):

View file

@ -653,6 +653,7 @@ def test_source_path_available(self, mock_stage_archive):
assert source_path.endswith(spack.stage._source_path_subdir)
assert not os.path.exists(source_path)
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_first_accessible_path(self, tmpdir):
"""Test _first_accessible_path names."""
spack_dir = tmpdir.join('paths')
@ -783,6 +784,7 @@ def test_resolve_paths(self):
assert spack.stage._resolve_paths(paths) == res_paths
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_get_stage_root_bad_path(self, clear_stage_root):
"""Ensure an invalid stage path root raises a StageError."""
with spack.config.override('config:build_stage', '/no/such/path'):

View file

@ -0,0 +1,44 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
import six.moves.urllib.parse as urllib_parse
import spack
import spack.util.url as url_util
def create_s3_session(url):
url = url_util.parse(url)
if url.scheme != 's3':
raise ValueError(
'Can not create S3 session from URL with scheme: {SCHEME}'.format(
SCHEME=url.scheme))
# NOTE(opadron): import boto and friends as late as possible. We don't
# want to require boto as a dependency unless the user actually wants to
# access S3 mirrors.
from boto3 import Session
session = Session()
s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')}
endpoint_url = os.environ.get('S3_ENDPOINT_URL')
if endpoint_url:
if urllib_parse.urlparse(endpoint_url, scheme=None).scheme is None:
endpoint_url = '://'.join(('https', endpoint_url))
s3_client_args['endpoint_url'] = endpoint_url
# if no access credentials provided above, then access anonymously
if not session.get_credentials():
from botocore import UNSIGNED
from botocore.client import Config
s3_client_args["config"] = Config(signature_version=UNSIGNED)
return session.client('s3', **s3_client_args)

175
lib/spack/spack/util/url.py Normal file
View file

@ -0,0 +1,175 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
Utility functions for parsing, formatting, and manipulating URLs.
"""
import itertools
import os.path
from six import string_types
import six.moves.urllib.parse as urllib_parse
import spack.util.path
def _split_all(path):
"""Split path into its atomic components.
Returns the shortest list, L, of strings such that os.path.join(*L) == path
and os.path.split(element) == ('', element) for every element in L except
possibly the first. This first element may possibly have the value of '/',
or some other OS-dependent path root.
"""
result = []
a = path
old_a = None
while a != old_a:
(old_a, (a, b)) = a, os.path.split(a)
if a or b:
result.insert(0, b or '/')
return result
def local_file_path(url):
"""Get a local file path from a url.
If url is a file:// URL, return the absolute path to the local
file or directory referenced by it. Otherwise, return None.
"""
if isinstance(url, string_types):
url = parse(url)
if url.scheme == 'file':
return url.path
return None
def parse(url, scheme='file'):
"""Parse a mirror url.
For file:// URLs, the netloc and path components are concatenated and
passed through spack.util.path.canoncalize_path().
Otherwise, the returned value is the same as urllib's urlparse() with
allow_fragments=False.
"""
url_obj = (
urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False)
if isinstance(url, string_types) else url)
(scheme, netloc, path, params, query, _) = url_obj
scheme = (scheme or 'file').lower()
if scheme == 'file':
path = spack.util.path.canonicalize_path(netloc + path)
while path.startswith('//'):
path = path[1:]
netloc = ''
return urllib_parse.ParseResult(scheme=scheme,
netloc=netloc,
path=path,
params=params,
query=query,
fragment=None)
def format(parsed_url):
"""Format a URL string
Returns a canonicalized format of the given URL as a string.
"""
if isinstance(parsed_url, string_types):
parsed_url = parse(parsed_url)
return parsed_url.geturl()
def join(base_url, path, *extra, **kwargs):
"""Joins a base URL with one or more local URL path components
If resolve_href is True, treat the base URL as though it where the locator
of a web page, and the remaining URL path components as though they formed
a relative URL to be resolved against it (i.e.: as in os.path.join(...)).
The result is an absolute URL to the resource to which a user's browser
would navigate if they clicked on a link with an "href" attribute equal to
the relative URL.
If resolve_href is False (default), then the URL path components are joined
as in os.path.join().
Examples:
base_url = 's3://bucket/index.html'
body = fetch_body(prefix)
link = get_href(body) # link == '../other-bucket/document.txt'
# wrong - link is a local URL that needs to be resolved against base_url
spack.util.url.join(base_url, link)
's3://bucket/other_bucket/document.txt'
# correct - resolve local URL against base_url
spack.util.url.join(base_url, link, resolve_href=True)
's3://other_bucket/document.txt'
prefix = 'https://mirror.spack.io/build_cache'
# wrong - prefix is just a URL prefix
spack.util.url.join(prefix, 'my-package', resolve_href=True)
'https://mirror.spack.io/my-package'
# correct - simply append additional URL path components
spack.util.url.join(prefix, 'my-package', resolve_href=False) # default
'https://mirror.spack.io/build_cache/my-package'
"""
base_url = parse(base_url)
resolve_href = kwargs.get('resolve_href', False)
(scheme, netloc, base_path, params, query, _) = base_url
scheme = scheme.lower()
path_tokens = [
part for part in itertools.chain(
_split_all(path),
itertools.chain.from_iterable(
_split_all(extra_path) for extra_path in extra))
if part and part != '/']
base_path_args = ['/fake-root']
if scheme == 's3':
if netloc:
base_path_args.append(netloc)
if base_path.startswith('/'):
base_path = base_path[1:]
base_path_args.append(base_path)
if resolve_href:
new_base_path, _ = os.path.split(os.path.join(*base_path_args))
base_path_args = [new_base_path]
base_path_args.extend(path_tokens)
base_path = os.path.relpath(os.path.join(*base_path_args), '/fake-root')
if scheme == 's3':
path_tokens = [
part for part in _split_all(base_path)
if part and part != '/']
if path_tokens:
netloc = path_tokens.pop(0)
base_path = os.path.join('', *path_tokens)
return format(urllib_parse.ParseResult(scheme=scheme,
netloc=netloc,
path=base_path,
params=params,
query=query,
fragment=None))

View file

@ -5,16 +5,21 @@
from __future__ import print_function
import codecs
import errno
import re
import os
import os.path
import shutil
import ssl
import sys
import traceback
import hashlib
from itertools import product
import six
from six.moves.urllib.request import urlopen, Request
from six.moves.urllib.error import URLError
from six.moves.urllib.parse import urljoin
import multiprocessing.pool
try:
@ -28,20 +33,47 @@
class HTMLParseError(Exception):
pass
from llnl.util.filesystem import mkdirp
import llnl.util.tty as tty
import spack.config
import spack.cmd
import spack.url
import spack.stage
import spack.config
import spack.error
import spack.url
import spack.util.crypto
import spack.util.s3 as s3_util
import spack.util.url as url_util
from spack.util.compression import ALLOWED_ARCHIVE_TYPES
# Timeout in seconds for web requests
_timeout = 10
# See docstring for standardize_header_names()
_separators = ('', ' ', '_', '-')
HTTP_HEADER_NAME_ALIASES = {
"Accept-ranges": set(
''.join((A, 'ccept', sep, R, 'anges'))
for A, sep, R in product('Aa', _separators, 'Rr')),
"Content-length": set(
''.join((C, 'ontent', sep, L, 'ength'))
for C, sep, L in product('Cc', _separators, 'Ll')),
"Content-type": set(
''.join((C, 'ontent', sep, T, 'ype'))
for C, sep, T in product('Cc', _separators, 'Tt')),
"Date": set(('Date', 'date')),
"Last-modified": set(
''.join((L, 'ast', sep, M, 'odified'))
for L, sep, M in product('Ll', _separators, 'Mm')),
"Server": set(('Server', 'server'))
}
class LinkParser(HTMLParser):
"""This parser just takes an HTML page and strips out the hrefs on the
@ -59,7 +91,7 @@ def handle_starttag(self, tag, attrs):
class NonDaemonProcess(multiprocessing.Process):
"""Process tha allows sub-processes, so pools can have sub-pools."""
"""Process that allows sub-processes, so pools can have sub-pools."""
@property
def daemon(self):
return False
@ -86,25 +118,53 @@ def __init__(self, *args, **kwargs):
super(NonDaemonPool, self).__init__(*args, **kwargs)
def _read_from_url(url, accept_content_type=None):
def uses_ssl(parsed_url):
if parsed_url.scheme == 'https':
return True
if parsed_url.scheme == 's3':
endpoint_url = os.environ.get('S3_ENDPOINT_URL')
if not endpoint_url:
return True
if url_util.parse(endpoint_url, scheme='https').scheme == 'https':
return True
return False
__UNABLE_TO_VERIFY_SSL = (
lambda pyver: (
(pyver < (2, 7, 9)) or
((3,) < pyver < (3, 4, 3))
))(sys.version_info)
def read_from_url(url, accept_content_type=None):
url = url_util.parse(url)
context = None
verify_ssl = spack.config.get('config:verify_ssl')
pyver = sys.version_info
if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)):
# Don't even bother with a context unless the URL scheme is one that uses
# SSL certs.
if uses_ssl(url):
if verify_ssl:
tty.warn("Spack will not check SSL certificates. You need to "
"update your Python to enable certificate "
"verification.")
elif verify_ssl:
# without a defined context, urlopen will not verify the ssl cert for
# python 3.x
context = ssl.create_default_context()
else:
context = ssl._create_unverified_context()
if __UNABLE_TO_VERIFY_SSL:
# User wants SSL verification, but it cannot be provided.
warn_no_ssl_cert_checking()
else:
# User wants SSL verification, and it *can* be provided.
context = ssl.create_default_context()
else:
# User has explicitly indicated that they do not want SSL
# verification.
context = ssl._create_unverified_context()
req = Request(url)
if accept_content_type:
req = Request(url_util.format(url))
content_type = None
is_web_url = url.scheme in ('http', 'https')
if accept_content_type and is_web_url:
# Make a HEAD request first to check the content type. This lets
# us ignore tarballs and gigantic files.
# It would be nice to do this with the HTTP Accept header to avoid
@ -113,29 +173,179 @@ def _read_from_url(url, accept_content_type=None):
req.get_method = lambda: "HEAD"
resp = _urlopen(req, timeout=_timeout, context=context)
if "Content-type" not in resp.headers:
tty.debug("ignoring page " + url)
return None, None
if not resp.headers["Content-type"].startswith(accept_content_type):
tty.debug("ignoring page " + url + " with content type " +
resp.headers["Content-type"])
return None, None
content_type = resp.headers.get('Content-type')
# Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET"
response = _urlopen(req, timeout=_timeout, context=context)
response_url = response.geturl()
# Read the page and and stick it in the map we'll return
page = response.read().decode('utf-8')
if accept_content_type and not is_web_url:
content_type = response.headers.get('Content-type')
return response_url, page
reject_content_type = (
accept_content_type and (
content_type is None or
not content_type.startswith(accept_content_type)))
if reject_content_type:
tty.debug("ignoring page {0}{1}{2}".format(
url_util.format(url),
" with content type " if content_type is not None else "",
content_type or ""))
return None, None, None
return response.geturl(), response.headers, response
def read_from_url(url, accept_content_type=None):
resp_url, contents = _read_from_url(url, accept_content_type)
return contents
def warn_no_ssl_cert_checking():
tty.warn("Spack will not check SSL certificates. You need to update "
"your Python to enable certificate verification.")
def push_to_url(local_path, remote_path, **kwargs):
keep_original = kwargs.get('keep_original', True)
local_url = url_util.parse(local_path)
local_file_path = url_util.local_file_path(local_url)
if local_file_path is None:
raise ValueError('local path must be a file:// url')
remote_url = url_util.parse(remote_path)
verify_ssl = spack.config.get('config:verify_ssl')
if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url):
warn_no_ssl_cert_checking()
remote_file_path = url_util.local_file_path(remote_url)
if remote_file_path is not None:
mkdirp(os.path.dirname(remote_file_path))
if keep_original:
shutil.copy(local_file_path, remote_file_path)
else:
try:
os.rename(local_file_path, remote_file_path)
except OSError as e:
if e.errno == errno.EXDEV:
# NOTE(opadron): The above move failed because it crosses
# filesystem boundaries. Copy the file (plus original
# metadata), and then delete the original. This operation
# needs to be done in separate steps.
shutil.copy2(local_file_path, remote_file_path)
os.remove(local_file_path)
elif remote_url.scheme == 's3':
extra_args = kwargs.get('extra_args', {})
remote_path = remote_url.path
while remote_path.startswith('/'):
remote_path = remote_path[1:]
s3 = s3_util.create_s3_session(remote_url)
s3.upload_file(local_file_path, remote_url.netloc,
remote_path, ExtraArgs=extra_args)
if not keep_original:
os.remove(local_file_path)
else:
raise NotImplementedError(
'Unrecognized URL scheme: {SCHEME}'.format(
SCHEME=remote_url.scheme))
def url_exists(url):
url = url_util.parse(url)
local_path = url_util.local_file_path(url)
if local_path:
return os.path.exists(local_path)
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
from botocore.exceptions import ClientError
try:
s3.get_object(Bucket=url.netloc, Key=url.path)
return True
except ClientError as err:
if err.response['Error']['Code'] == 'NoSuchKey':
return False
raise err
# otherwise, just try to "read" from the URL, and assume that *any*
# non-throwing response contains the resource represented by the URL
try:
read_from_url(url)
return True
except URLError:
return False
def remove_url(url):
url = url_util.parse(url)
local_path = url_util.local_file_path(url)
if local_path:
os.remove(local_path)
return
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
s3.delete_object(Bucket=url.s3_bucket, Key=url.path)
return
# Don't even try for other URL schemes.
def _list_s3_objects(client, url, num_entries, start_after=None):
list_args = dict(
Bucket=url.netloc,
Prefix=url.path,
MaxKeys=num_entries)
if start_after is not None:
list_args['StartAfter'] = start_after
result = client.list_objects_v2(**list_args)
last_key = None
if result['IsTruncated']:
last_key = result['Contents'][-1]['Key']
iter = (key for key in
(
os.path.relpath(entry['Key'], url.path)
for entry in result['Contents']
)
if key != '.')
return iter, last_key
def _iter_s3_prefix(client, url, num_entries=1024):
key = None
while True:
contents, key = _list_s3_objects(
client, url, num_entries, start_after=key)
for x in contents:
yield x
if not key:
break
def list_url(url):
url = url_util.parse(url)
local_path = url_util.local_file_path(url)
if local_path:
return os.listdir(local_path)
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
return list(set(
key.split('/', 1)[0]
for key in _iter_s3_prefix(s3, url)))
def _spider(url, visited, root, depth, max_depth, raise_on_error):
@ -154,16 +364,12 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
pages = {} # dict from page URL -> text content.
links = set() # set of all links seen on visited pages.
# root may end with index.html -- chop that off.
if root.endswith('/index.html'):
root = re.sub('/index.html$', '', root)
try:
response_url, page = _read_from_url(url, 'text/html')
if not response_url or not page:
response_url, _, response = read_from_url(url, 'text/html')
if not response_url or not response:
return pages, links
page = codecs.getreader('utf-8')(response).read()
pages[response_url] = page
# Parse out the links in the page
@ -173,8 +379,10 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
while link_parser.links:
raw_link = link_parser.links.pop()
abs_link = urljoin(response_url, raw_link.strip())
abs_link = url_util.join(
response_url,
raw_link.strip(),
resolve_href=True)
links.add(abs_link)
# Skip stuff that looks like an archive
@ -243,16 +451,28 @@ def _spider_wrapper(args):
return _spider(*args)
def _urlopen(*args, **kwargs):
def _urlopen(req, *args, **kwargs):
"""Wrapper for compatibility with old versions of Python."""
# We don't pass 'context' parameter to urlopen because it
# was introduces only starting versions 2.7.9 and 3.4.3 of Python.
if 'context' in kwargs and kwargs['context'] is None:
url = req
try:
url = url.get_full_url()
except AttributeError:
pass
# We don't pass 'context' parameter because it was only introduced starting
# with versions 2.7.9 and 3.4.3 of Python.
if 'context' in kwargs:
del kwargs['context']
return urlopen(*args, **kwargs)
opener = urlopen
if url_util.parse(url).scheme == 's3':
import spack.s3_handler
opener = spack.s3_handler.open
return opener(req, *args, **kwargs)
def spider(root_url, depth=0):
def spider(root, depth=0):
"""Gets web pages from a root URL.
If depth is specified (e.g., depth=2), then this will also follow
@ -262,7 +482,9 @@ def spider(root_url, depth=0):
performance over a sequential fetch.
"""
pages, links = _spider(root_url, set(), root_url, 0, depth, False)
root = url_util.parse(root)
pages, links = _spider(root, set(), root, 0, depth, False)
return pages, links
@ -356,99 +578,112 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
return versions
def get_checksums_for_versions(
url_dict, name, first_stage_function=None, keep_stage=False):
"""Fetches and checksums archives from URLs.
def standardize_header_names(headers):
"""Replace certain header names with standardized spellings.
This function is called by both ``spack checksum`` and ``spack
create``. The ``first_stage_function`` argument allows the caller to
inspect the first downloaded archive, e.g., to determine the build
system.
Standardizes the spellings of the following header names:
- Accept-ranges
- Content-length
- Content-type
- Date
- Last-modified
- Server
Args:
url_dict (dict): A dictionary of the form: version -> URL
name (str): The name of the package
first_stage_function (callable): function that takes a Stage and a URL;
this is run on the stage of the first URL downloaded
keep_stage (bool): whether to keep staging area when command completes
Every name considered is translated to one of the above names if the only
difference between the two is how the first letters of each word are
capitalized; whether words are separated; or, if separated, whether they
are so by a dash (-), underscore (_), or space ( ). Header names that
cannot be mapped as described above are returned unaltered.
Returns:
(str): A multi-line string containing versions and corresponding hashes
For example: The standard spelling of "Content-length" would be substituted
for any of the following names:
- Content-length
- content_length
- contentlength
- content_Length
- contentLength
- content Length
... and any other header name, such as "Content-encoding", would not be
altered, regardless of spelling.
If headers is a string, then it (or an appropriate substitute) is returned.
If headers is a non-empty tuple, headers[0] is a string, and there exists a
standardized spelling for header[0] that differs from it, then a new tuple
is returned. This tuple has the same elements as headers, except the first
element is the standardized spelling for headers[0].
If headers is a sequence, then a new list is considered, where each element
is its corresponding element in headers, but mapped as above if a string or
tuple. This new list is returned if at least one of its elements differ
from their corrsponding element in headers.
If headers is a mapping, then a new dict is considered, where the key in
each item is the key of its corresponding item in headers, mapped as above
if a string or tuple. The value is taken from the corresponding item. If
the keys of multiple items in headers map to the same key after being
standardized, then the value for the resulting item is undefined. The new
dict is returned if at least one of its items has a key that differs from
that of their corresponding item in headers, or if the keys of multiple
items in headers map to the same key after being standardized.
In all other cases headers is returned unaltered.
"""
sorted_versions = sorted(url_dict.keys(), reverse=True)
if isinstance(headers, six.string_types):
for standardized_spelling, other_spellings in (
HTTP_HEADER_NAME_ALIASES.items()):
if headers in other_spellings:
if headers == standardized_spelling:
return headers
return standardized_spelling
return headers
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v in sorted_versions)
num_ver = len(sorted_versions)
if isinstance(headers, tuple):
if not headers:
return headers
old = headers[0]
if isinstance(old, six.string_types):
new = standardize_header_names(old)
if old is not new:
return (new,) + headers[1:]
return headers
tty.msg("Found {0} version{1} of {2}:".format(
num_ver, '' if num_ver == 1 else 's', name),
"",
*spack.cmd.elide_list(
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
for v in sorted_versions]))
print()
try:
changed = False
new_dict = {}
for key, value in headers.items():
if isinstance(key, (tuple, six.string_types)):
old_key, key = key, standardize_header_names(key)
changed = changed or key is not old_key
archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=1, abort='q')
new_dict[key] = value
if not archives_to_fetch:
tty.die("Aborted.")
return new_dict if changed else headers
except (AttributeError, TypeError, ValueError):
pass
versions = sorted_versions[:archives_to_fetch]
urls = [url_dict[v] for v in versions]
try:
changed = False
new_list = []
for item in headers:
if isinstance(item, (tuple, six.string_types)):
old_item, item = item, standardize_header_names(item)
changed = changed or item is not old_item
tty.msg("Downloading...")
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with spack.stage.Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)
new_list.append(item)
# Checksum the archive and add it to the list
version_hashes.append((version, spack.util.crypto.checksum(
hashlib.sha256, stage.archive_file)))
i += 1
except spack.stage.FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))
return new_list if changed else headers
except TypeError:
pass
if not version_hashes:
tty.die("Could not fetch any versions for {0}".format(name))
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)
# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}sha256='{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))
return version_lines
return headers
class SpackWebError(spack.error.SpackError):
"""Superclass for Spack web spidering errors."""
class VersionFetchError(SpackWebError):
"""Raised when we can't determine a URL to fetch a package."""
class NoNetworkConnectionError(SpackWebError):
"""Raised when an operation can't get an internet connection."""
def __init__(self, message, url):