fetching: S3 upload and download (#11117)

This extends Spack functionality so that it can fetch sources and binaries from-, push sources and binaries to-, and index the contents of- mirrors hosted on an S3 bucket.

High level to-do list:

- [x] Extend mirrors configuration to add support for `file://`, and `s3://` URLs.
- [x] Ensure all fetching, pushing, and indexing operations work for `file://` URLs.
- [x] Implement S3 source fetching
- [x] Implement S3 binary mirror indexing
- [x] Implement S3 binary package fetching
- [x] Implement S3 source pushing
- [x] Implement S3 binary package pushing

Important details:

* refactor URL handling to handle S3 URLs and mirror URLs more gracefully.
  - updated parse() to accept already-parsed URL objects.  an equivalent object
    is returned with any extra s3-related attributes intact.  Objects created with
    urllib can also be passed, and the additional s3 handling logic will still be applied.

* update mirror schema/parsing (mirror can have separate fetch/push URLs)
* implement s3_fetch_strategy/several utility changes
* provide more feature-complete S3 fetching
* update buildcache create command to support S3

* Move the core logic for reading data from S3 out of the s3 fetch strategy and into
  the s3 URL handler.  The s3 fetch strategy now calls into `read_from_url()` Since
  read_from_url can now handle S3 URLs, the S3 fetch strategy is redundant.  It's
  not clear whether the ideal design is to have S3 fetching functionality in a fetch
  strategy, directly implemented in read_from_url, or both.

* expanded what can be passed to `spack buildcache` via the -d flag: In addition
  to a directory on the local filesystem, the name of a configured mirror can be
  passed, or a push URL can be passed directly.
This commit is contained in:
Omar Padron 2019-10-22 03:32:04 -04:00 committed by Todd Gamblin
parent 6cb972a9d2
commit fd58c98b0e
21 changed files with 1411 additions and 280 deletions

View file

@ -1,3 +1,8 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
generate ci jobs: generate ci jobs:
script: script:
- "./bin/generate-gitlab-ci-yml.sh" - "./bin/generate-gitlab-ci-yml.sh"

View file

@ -10,4 +10,4 @@ set -x
SPACK_BIN_DIR="${CI_PROJECT_DIR}/bin" SPACK_BIN_DIR="${CI_PROJECT_DIR}/bin"
export PATH="${SPACK_BIN_DIR}:${PATH}" export PATH="${SPACK_BIN_DIR}:${PATH}"
spack upload-s3 index spack buildcache update-index -d "$MIRROR_URL"

View file

@ -3,6 +3,7 @@
# #
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import codecs
import os import os
import re import re
import tarfile import tarfile
@ -23,14 +24,32 @@
import spack.util.gpg as gpg_util import spack.util.gpg as gpg_util
import spack.relocate as relocate import spack.relocate as relocate
import spack.util.spack_yaml as syaml import spack.util.spack_yaml as syaml
import spack.mirror
import spack.util.url as url_util
import spack.util.web as web_util
from spack.spec import Spec from spack.spec import Spec
from spack.stage import Stage from spack.stage import Stage
from spack.util.gpg import Gpg from spack.util.gpg import Gpg
from spack.util.web import spider, read_from_url
from spack.util.executable import ProcessError from spack.util.executable import ProcessError
_build_cache_relative_path = 'build_cache' _build_cache_relative_path = 'build_cache'
BUILD_CACHE_INDEX_TEMPLATE = '''
<html>
<head>
<title>{title}</title>
</head>
<body>
<ul>
{path_list}
</ul>
</body>
</html>
'''
BUILD_CACHE_INDEX_ENTRY_TEMPLATE = ' <li><a href="{path}">{path}</a></li>'
class NoOverwriteException(Exception): class NoOverwriteException(Exception):
""" """
@ -101,7 +120,7 @@ def build_cache_relative_path():
return _build_cache_relative_path return _build_cache_relative_path
def build_cache_directory(prefix): def build_cache_prefix(prefix):
return os.path.join(prefix, build_cache_relative_path()) return os.path.join(prefix, build_cache_relative_path())
@ -246,29 +265,36 @@ def sign_tarball(key, force, specfile_path):
Gpg.sign(key, specfile_path, '%s.asc' % specfile_path) Gpg.sign(key, specfile_path, '%s.asc' % specfile_path)
def _generate_html_index(path_list, output_path): def generate_package_index(cache_prefix):
f = open(output_path, 'w') """Create the build cache index page.
header = """<html>\n
<head>\n</head>\n
<list>\n"""
footer = "</list>\n</html>\n"
f.write(header)
for path in path_list:
rel = os.path.basename(path)
f.write('<li><a href="%s"> %s</a>\n' % (rel, rel))
f.write(footer)
f.close()
Creates (or replaces) the "index.html" page at the location given in
cache_prefix. This page contains a link for each binary package (*.yaml)
and signing key (*.key) under cache_prefix.
"""
tmpdir = tempfile.mkdtemp()
try:
index_html_path = os.path.join(tmpdir, 'index.html')
file_list = (
entry
for entry in web_util.list_url(cache_prefix)
if (entry.endswith('.yaml')
or entry.endswith('.key')))
def generate_package_index(build_cache_dir): with open(index_html_path, 'w') as f:
yaml_list = os.listdir(build_cache_dir) f.write(BUILD_CACHE_INDEX_TEMPLATE.format(
path_list = [os.path.join(build_cache_dir, l) for l in yaml_list] title='Spack Package Index',
path_list='\n'.join(
BUILD_CACHE_INDEX_ENTRY_TEMPLATE.format(path=path)
for path in file_list)))
index_html_path_tmp = os.path.join(build_cache_dir, 'index.html.tmp') web_util.push_to_url(
index_html_path = os.path.join(build_cache_dir, 'index.html') index_html_path,
url_util.join(cache_prefix, 'index.html'),
_generate_html_index(path_list, index_html_path_tmp) keep_original=False,
shutil.move(index_html_path_tmp, index_html_path) extra_args={'ContentType': 'text/html'})
finally:
shutil.rmtree(tmpdir)
def build_tarball(spec, outdir, force=False, rel=False, unsigned=False, def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
@ -281,33 +307,41 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
raise ValueError('spec must be concrete to build tarball') raise ValueError('spec must be concrete to build tarball')
# set up some paths # set up some paths
build_cache_dir = build_cache_directory(outdir) tmpdir = tempfile.mkdtemp()
cache_prefix = build_cache_prefix(tmpdir)
tarfile_name = tarball_name(spec, '.tar.gz') tarfile_name = tarball_name(spec, '.tar.gz')
tarfile_dir = os.path.join(build_cache_dir, tarfile_dir = os.path.join(cache_prefix, tarball_directory_name(spec))
tarball_directory_name(spec))
tarfile_path = os.path.join(tarfile_dir, tarfile_name) tarfile_path = os.path.join(tarfile_dir, tarfile_name)
mkdirp(tarfile_dir)
spackfile_path = os.path.join( spackfile_path = os.path.join(
build_cache_dir, tarball_path_name(spec, '.spack')) cache_prefix, tarball_path_name(spec, '.spack'))
if os.path.exists(spackfile_path):
remote_spackfile_path = url_util.join(
outdir, os.path.relpath(spackfile_path, tmpdir))
mkdirp(tarfile_dir)
if web_util.url_exists(remote_spackfile_path):
if force: if force:
os.remove(spackfile_path) web_util.remove_url(remote_spackfile_path)
else: else:
raise NoOverwriteException(str(spackfile_path)) raise NoOverwriteException(url_util.format(remote_spackfile_path))
# need to copy the spec file so the build cache can be downloaded # need to copy the spec file so the build cache can be downloaded
# without concretizing with the current spack packages # without concretizing with the current spack packages
# and preferences # and preferences
spec_file = os.path.join(spec.prefix, ".spack", "spec.yaml") spec_file = os.path.join(spec.prefix, ".spack", "spec.yaml")
specfile_name = tarball_name(spec, '.spec.yaml') specfile_name = tarball_name(spec, '.spec.yaml')
specfile_path = os.path.realpath( specfile_path = os.path.realpath(
os.path.join(build_cache_dir, specfile_name)) os.path.join(cache_prefix, specfile_name))
if os.path.exists(specfile_path): remote_specfile_path = url_util.join(
outdir, os.path.relpath(specfile_path, os.path.realpath(tmpdir)))
if web_util.url_exists(remote_specfile_path):
if force: if force:
os.remove(specfile_path) web_util.remove_url(remote_specfile_path)
else: else:
raise NoOverwriteException(str(specfile_path)) raise NoOverwriteException(url_util.format(remote_specfile_path))
# make a copy of the install directory to work with # make a copy of the install directory to work with
workdir = os.path.join(tempfile.mkdtemp(), os.path.basename(spec.prefix)) workdir = os.path.join(tempfile.mkdtemp(), os.path.basename(spec.prefix))
@ -324,6 +358,7 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
except Exception as e: except Exception as e:
shutil.rmtree(workdir) shutil.rmtree(workdir)
shutil.rmtree(tarfile_dir) shutil.rmtree(tarfile_dir)
shutil.rmtree(tmpdir)
tty.die(e) tty.die(e)
else: else:
try: try:
@ -331,7 +366,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
except Exception as e: except Exception as e:
shutil.rmtree(workdir) shutil.rmtree(workdir)
shutil.rmtree(tarfile_dir) shutil.rmtree(tarfile_dir)
shutil.rmtree(tmpdir)
tty.die(e) tty.die(e)
# create compressed tarball of the install prefix # create compressed tarball of the install prefix
with closing(tarfile.open(tarfile_path, 'w:gz')) as tar: with closing(tarfile.open(tarfile_path, 'w:gz')) as tar:
tar.add(name='%s' % workdir, tar.add(name='%s' % workdir,
@ -360,7 +397,9 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
spec_dict['full_hash'] = spec.full_hash() spec_dict['full_hash'] = spec.full_hash()
tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format( tty.debug('The full_hash ({0}) of {1} will be written into {2}'.format(
spec_dict['full_hash'], spec.name, specfile_path)) spec_dict['full_hash'],
spec.name,
url_util.format(remote_specfile_path)))
tty.debug(spec.tree()) tty.debug(spec.tree())
with open(specfile_path, 'w') as outfile: with open(specfile_path, 'w') as outfile:
@ -382,9 +421,19 @@ def build_tarball(spec, outdir, force=False, rel=False, unsigned=False,
if not unsigned: if not unsigned:
os.remove('%s.asc' % specfile_path) os.remove('%s.asc' % specfile_path)
# create an index.html for the build_cache directory so specs can be found web_util.push_to_url(
if regenerate_index: spackfile_path, remote_spackfile_path, keep_original=False)
generate_package_index(build_cache_dir) web_util.push_to_url(
specfile_path, remote_specfile_path, keep_original=False)
try:
# create an index.html for the build_cache directory so specs can be
# found
if regenerate_index:
generate_package_index(url_util.join(
outdir, os.path.relpath(cache_prefix, tmpdir)))
finally:
shutil.rmtree(tmpdir)
return None return None
@ -394,13 +443,16 @@ def download_tarball(spec):
Download binary tarball for given package into stage area Download binary tarball for given package into stage area
Return True if successful Return True if successful
""" """
mirrors = spack.config.get('mirrors') if not spack.mirror.MirrorCollection():
if len(mirrors) == 0:
tty.die("Please add a spack mirror to allow " + tty.die("Please add a spack mirror to allow " +
"download of pre-compiled packages.") "download of pre-compiled packages.")
tarball = tarball_path_name(spec, '.spack') tarball = tarball_path_name(spec, '.spack')
for mirror_name, mirror_url in mirrors.items():
url = mirror_url + '/' + _build_cache_relative_path + '/' + tarball for mirror in spack.mirror.MirrorCollection().values():
url = url_util.join(
mirror.fetch_url, _build_cache_relative_path, tarball)
# stage the tarball into standard place # stage the tarball into standard place
stage = Stage(url, name="build_cache", keep=True) stage = Stage(url, name="build_cache", keep=True)
try: try:
@ -408,6 +460,7 @@ def download_tarball(spec):
return stage.save_filename return stage.save_filename
except fs.FetchError: except fs.FetchError:
continue continue
return None return None
@ -610,26 +663,29 @@ def get_specs(force=False):
tty.debug("Using previously-retrieved specs") tty.debug("Using previously-retrieved specs")
return _cached_specs return _cached_specs
mirrors = spack.config.get('mirrors') if not spack.mirror.MirrorCollection():
if len(mirrors) == 0: tty.warn("No Spack mirrors are currently configured")
tty.debug("No Spack mirrors are currently configured")
return {} return {}
urls = set() urls = set()
for mirror_name, mirror_url in mirrors.items(): for mirror in spack.mirror.MirrorCollection().values():
if mirror_url.startswith('file'): fetch_url_build_cache = url_util.join(
mirror = mirror_url.replace( mirror.fetch_url, _build_cache_relative_path)
'file://', '') + "/" + _build_cache_relative_path
tty.msg("Finding buildcaches in %s" % mirror) mirror_dir = url_util.local_file_path(fetch_url_build_cache)
if os.path.exists(mirror): if mirror_dir:
files = os.listdir(mirror) tty.msg("Finding buildcaches in %s" % mirror_dir)
if os.path.exists(mirror_dir):
files = os.listdir(mirror_dir)
for file in files: for file in files:
if re.search('spec.yaml', file): if re.search('spec.yaml', file):
link = 'file://' + mirror + '/' + file link = url_util.join(fetch_url_build_cache, file)
urls.add(link) urls.add(link)
else: else:
tty.msg("Finding buildcaches on %s" % mirror_url) tty.msg("Finding buildcaches at %s" %
p, links = spider(mirror_url + "/" + _build_cache_relative_path) url_util.format(fetch_url_build_cache))
p, links = web_util.spider(
url_util.join(fetch_url_build_cache, 'index.html'))
for link in links: for link in links:
if re.search("spec.yaml", link): if re.search("spec.yaml", link):
urls.add(link) urls.add(link)
@ -659,28 +715,33 @@ def get_keys(install=False, trust=False, force=False):
""" """
Get pgp public keys available on mirror Get pgp public keys available on mirror
""" """
mirrors = spack.config.get('mirrors') if not spack.mirror.MirrorCollection():
if len(mirrors) == 0:
tty.die("Please add a spack mirror to allow " + tty.die("Please add a spack mirror to allow " +
"download of build caches.") "download of build caches.")
keys = set() keys = set()
for mirror_name, mirror_url in mirrors.items():
if mirror_url.startswith('file'): for mirror in spack.mirror.MirrorCollection().values():
mirror = os.path.join( fetch_url_build_cache = url_util.join(
mirror_url.replace('file://', ''), _build_cache_relative_path) mirror.fetch_url, _build_cache_relative_path)
tty.msg("Finding public keys in %s" % mirror)
files = os.listdir(mirror) mirror_dir = url_util.local_file_path(fetch_url_build_cache)
if mirror_dir:
tty.msg("Finding public keys in %s" % mirror_dir)
files = os.listdir(mirror_dir)
for file in files: for file in files:
if re.search(r'\.key', file): if re.search(r'\.key', file):
link = 'file://' + mirror + '/' + file link = url_util.join(fetch_url_build_cache, file)
keys.add(link) keys.add(link)
else: else:
tty.msg("Finding public keys on %s" % mirror_url) tty.msg("Finding public keys at %s" %
p, links = spider(mirror_url + "/build_cache", depth=1) url_util.format(fetch_url_build_cache))
p, links = web_util.spider(fetch_url_build_cache, depth=1)
for link in links: for link in links:
if re.search(r'\.key', link): if re.search(r'\.key', link):
keys.add(link) keys.add(link)
for link in keys: for link in keys:
with Stage(link, name="build_cache", keep=True) as stage: with Stage(link, name="build_cache", keep=True) as stage:
if os.path.exists(stage.save_filename) and force: if os.path.exists(stage.save_filename) and force:
@ -717,15 +778,16 @@ def needs_rebuild(spec, mirror_url, rebuild_on_errors=False):
# Try to retrieve the .spec.yaml directly, based on the known # Try to retrieve the .spec.yaml directly, based on the known
# format of the name, in order to determine if the package # format of the name, in order to determine if the package
# needs to be rebuilt. # needs to be rebuilt.
build_cache_dir = build_cache_directory(mirror_url) cache_prefix = build_cache_prefix(mirror_url)
spec_yaml_file_name = tarball_name(spec, '.spec.yaml') spec_yaml_file_name = tarball_name(spec, '.spec.yaml')
file_path = os.path.join(build_cache_dir, spec_yaml_file_name) file_path = os.path.join(cache_prefix, spec_yaml_file_name)
result_of_error = 'Package ({0}) will {1}be rebuilt'.format( result_of_error = 'Package ({0}) will {1}be rebuilt'.format(
spec.short_spec, '' if rebuild_on_errors else 'not ') spec.short_spec, '' if rebuild_on_errors else 'not ')
try: try:
yaml_contents = read_from_url(file_path) _, _, yaml_file = web_util.read_from_url(file_path)
yaml_contents = codecs.getreader('utf-8')(yaml_file).read()
except URLError as url_err: except URLError as url_err:
err_msg = [ err_msg = [
'Unable to determine whether {0} needs rebuilding,', 'Unable to determine whether {0} needs rebuilding,',
@ -782,22 +844,22 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
""" """
rebuilds = {} rebuilds = {}
for mirror_name, mirror_url in mirrors.items(): for mirror in spack.mirror.MirrorCollection(mirrors).values():
tty.msg('Checking for built specs at %s' % mirror_url) tty.msg('Checking for built specs at %s' % mirror.fetch_url)
rebuild_list = [] rebuild_list = []
for spec in specs: for spec in specs:
if needs_rebuild(spec, mirror_url, rebuild_on_errors): if needs_rebuild(spec, mirror.fetch_url, rebuild_on_errors):
rebuild_list.append({ rebuild_list.append({
'short_spec': spec.short_spec, 'short_spec': spec.short_spec,
'hash': spec.dag_hash() 'hash': spec.dag_hash()
}) })
if rebuild_list: if rebuild_list:
rebuilds[mirror_url] = { rebuilds[mirror.fetch_url] = {
'mirrorName': mirror_name, 'mirrorName': mirror.name,
'mirrorUrl': mirror_url, 'mirrorUrl': mirror.fetch_url,
'rebuildSpecs': rebuild_list 'rebuildSpecs': rebuild_list
} }
@ -810,33 +872,36 @@ def check_specs_against_mirrors(mirrors, specs, output_file=None,
def _download_buildcache_entry(mirror_root, descriptions): def _download_buildcache_entry(mirror_root, descriptions):
for description in descriptions: for description in descriptions:
url = os.path.join(mirror_root, description['url']) description_url = os.path.join(mirror_root, description['url'])
path = description['path'] path = description['path']
fail_if_missing = description['required'] fail_if_missing = description['required']
mkdirp(path) mkdirp(path)
stage = Stage(url, name="build_cache", path=path, keep=True) stage = Stage(
description_url, name="build_cache", path=path, keep=True)
try: try:
stage.fetch() stage.fetch()
except fs.FetchError as e: except fs.FetchError as e:
tty.debug(e) tty.debug(e)
if fail_if_missing: if fail_if_missing:
tty.error('Failed to download required url {0}'.format(url)) tty.error('Failed to download required url {0}'.format(
description_url))
return False return False
return True return True
def download_buildcache_entry(file_descriptions): def download_buildcache_entry(file_descriptions):
mirrors = spack.config.get('mirrors') if not spack.mirror.MirrorCollection():
if len(mirrors) == 0:
tty.die("Please add a spack mirror to allow " + tty.die("Please add a spack mirror to allow " +
"download of buildcache entries.") "download of buildcache entries.")
for mirror_name, mirror_url in mirrors.items(): for mirror in spack.mirror.MirrorCollection().values():
mirror_root = os.path.join(mirror_url, _build_cache_relative_path) mirror_root = os.path.join(
mirror.fetch_url,
_build_cache_relative_path)
if _download_buildcache_entry(mirror_root, file_descriptions): if _download_buildcache_entry(mirror_root, file_descriptions):
return True return True

View file

@ -9,11 +9,13 @@
import llnl.util.lang import llnl.util.lang
from llnl.util.filesystem import mkdirp from llnl.util.filesystem import mkdirp
import spack.error
import spack.paths import spack.paths
import spack.config import spack.config
import spack.fetch_strategy import spack.fetch_strategy
import spack.util.file_cache import spack.util.file_cache
from spack.util.path import canonicalize_path import spack.util.path
import spack.util.url as url_util
def _misc_cache(): def _misc_cache():
@ -25,7 +27,7 @@ def _misc_cache():
path = spack.config.get('config:misc_cache') path = spack.config.get('config:misc_cache')
if not path: if not path:
path = os.path.join(spack.paths.user_config_path, 'cache') path = os.path.join(spack.paths.user_config_path, 'cache')
path = canonicalize_path(path) path = spack.util.path.canonicalize_path(path)
return spack.util.file_cache.FileCache(path) return spack.util.file_cache.FileCache(path)
@ -43,22 +45,26 @@ def _fetch_cache():
path = spack.config.get('config:source_cache') path = spack.config.get('config:source_cache')
if not path: if not path:
path = os.path.join(spack.paths.var_path, "cache") path = os.path.join(spack.paths.var_path, "cache")
path = canonicalize_path(path) path = spack.util.path.canonicalize_path(path)
return spack.fetch_strategy.FsCache(path) return spack.fetch_strategy.FsCache(path)
class MirrorCache(object): class MirrorCache(object):
def __init__(self, root): def __init__(self, root):
self.root = os.path.abspath(root) self.root = url_util.local_file_path(root)
if not self.root:
raise spack.error.SpackError(
'MirrorCaches only work with file:// URLs')
self.new_resources = set() self.new_resources = set()
self.existing_resources = set() self.existing_resources = set()
def store(self, fetcher, relative_dest): def store(self, fetcher, relative_dest):
# Note this will archive package sources even if they would not # Note this will archive package sources even if they would not
# normally be cached (e.g. the current tip of an hg/git branch) # normally be cached (e.g. the current tip of an hg/git branch)
dst = os.path.join(self.root, relative_dest) dst = os.path.join(self.root, relative_dest)
if os.path.exists(dst): if os.path.exists(dst):
self.existing_resources.add(relative_dest) self.existing_resources.add(relative_dest)
else: else:

View file

@ -14,6 +14,7 @@
import spack.cmd.common.arguments as arguments import spack.cmd.common.arguments as arguments
import spack.environment as ev import spack.environment as ev
import spack.hash_types as ht import spack.hash_types as ht
import spack.mirror
import spack.relocate import spack.relocate
import spack.repo import spack.repo
import spack.spec import spack.spec
@ -21,6 +22,8 @@
import spack.config import spack.config
import spack.repo import spack.repo
import spack.store import spack.store
import spack.util.url as url_util
from spack.error import SpecError from spack.error import SpecError
from spack.spec import Spec, save_dependency_spec_yamls from spack.spec import Spec, save_dependency_spec_yamls
@ -205,6 +208,13 @@ def setup_parser(subparser):
help='Destination mirror url') help='Destination mirror url')
copy.set_defaults(func=buildcache_copy) copy.set_defaults(func=buildcache_copy)
# Update buildcache index without copying any additional packages
update_index = subparsers.add_parser(
'update-index', help=buildcache_update_index.__doc__)
update_index.add_argument(
'-d', '--mirror-url', default=None, help='Destination mirror url')
update_index.set_defaults(func=buildcache_update_index)
def find_matching_specs(pkgs, allow_multiple_matches=False, env=None): def find_matching_specs(pkgs, allow_multiple_matches=False, env=None):
"""Returns a list of specs matching the not necessarily """Returns a list of specs matching the not necessarily
@ -312,9 +322,14 @@ def createtarball(args):
" yaml file containing a spec to install") " yaml file containing a spec to install")
pkgs = set(packages) pkgs = set(packages)
specs = set() specs = set()
outdir = '.' outdir = '.'
if args.directory: if args.directory:
outdir = args.directory outdir = args.directory
mirror = spack.mirror.MirrorCollection().lookup(outdir)
outdir = url_util.format(mirror.push_url)
signkey = None signkey = None
if args.key: if args.key:
signkey = args.key signkey = args.key
@ -649,6 +664,19 @@ def buildcache_copy(args):
shutil.copyfile(cdashid_src_path, cdashid_dest_path) shutil.copyfile(cdashid_src_path, cdashid_dest_path)
def buildcache_update_index(args):
"""Update a buildcache index."""
outdir = '.'
if args.mirror_url:
outdir = args.mirror_url
mirror = spack.mirror.MirrorCollection().lookup(outdir)
outdir = url_util.format(mirror.push_url)
bindist.generate_package_index(
url_util.join(outdir, bindist.build_cache_relative_path()))
def buildcache(parser, args): def buildcache(parser, args):
if args.func: if args.func:
args.func(args) args.func(args)

View file

@ -11,8 +11,8 @@
import spack.cmd import spack.cmd
import spack.repo import spack.repo
import spack.stage
import spack.util.crypto import spack.util.crypto
import spack.util.web
from spack.util.naming import valid_fully_qualified_module_name from spack.util.naming import valid_fully_qualified_module_name
from spack.version import ver, Version from spack.version import ver, Version
@ -56,7 +56,7 @@ def checksum(parser, args):
if not url_dict: if not url_dict:
tty.die("Could not find any versions for {0}".format(pkg.name)) tty.die("Could not find any versions for {0}".format(pkg.name))
version_lines = spack.util.web.get_checksums_for_versions( version_lines = spack.stage.get_checksums_for_versions(
url_dict, pkg.name, keep_stage=args.keep_stage) url_dict, pkg.name, keep_stage=args.keep_stage)
print() print()

View file

@ -13,6 +13,7 @@
import spack.util.web import spack.util.web
import spack.repo import spack.repo
import spack.stage
from spack.spec import Spec from spack.spec import Spec
from spack.util.editor import editor from spack.util.editor import editor
from spack.util.executable import which, ProcessError from spack.util.executable import which, ProcessError
@ -618,7 +619,7 @@ def get_versions(args, name):
version = parse_version(args.url) version = parse_version(args.url)
url_dict = {version: args.url} url_dict = {version: args.url}
versions = spack.util.web.get_checksums_for_versions( versions = spack.stage.get_checksums_for_versions(
url_dict, name, first_stage_function=guesser, url_dict, name, first_stage_function=guesser,
keep_stage=args.keep_stage) keep_stage=args.keep_stage)
else: else:

View file

@ -4,20 +4,21 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT) # SPDX-License-Identifier: (Apache-2.0 OR MIT)
import sys import sys
import os
from datetime import datetime
import argparse import argparse
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.tty.colify import colify from llnl.util.tty.colify import colify
import spack.cmd import spack.cmd
import spack.cmd.common.arguments as arguments
import spack.concretize import spack.concretize
import spack.config import spack.config
import spack.environment as ev
import spack.mirror import spack.mirror
import spack.repo import spack.repo
import spack.cmd.common.arguments as arguments import spack.util.url as url_util
import spack.environment as ev import spack.util.web as web_util
from spack.spec import Spec from spack.spec import Spec
from spack.error import SpackError from spack.error import SpackError
from spack.util.spack_yaml import syaml_dict from spack.util.spack_yaml import syaml_dict
@ -73,6 +74,19 @@ def setup_parser(subparser):
default=spack.config.default_modify_scope(), default=spack.config.default_modify_scope(),
help="configuration scope to modify") help="configuration scope to modify")
# Set-Url
set_url_parser = sp.add_parser('set-url', help=mirror_set_url.__doc__)
set_url_parser.add_argument('name', help="mnemonic name for mirror")
set_url_parser.add_argument(
'url', help="url of mirror directory from 'spack mirror create'")
set_url_parser.add_argument(
'--push', action='store_true',
help="set only the URL used for uploading new packages")
set_url_parser.add_argument(
'--scope', choices=scopes, metavar=scopes_metavar,
default=spack.config.default_modify_scope(),
help="configuration scope to modify")
# List # List
list_parser = sp.add_parser('list', help=mirror_list.__doc__) list_parser = sp.add_parser('list', help=mirror_list.__doc__)
list_parser.add_argument( list_parser.add_argument(
@ -83,20 +97,14 @@ def setup_parser(subparser):
def mirror_add(args): def mirror_add(args):
"""Add a mirror to Spack.""" """Add a mirror to Spack."""
url = args.url url = url_util.format(args.url)
if url.startswith('/'):
url = 'file://' + url
mirrors = spack.config.get('mirrors', scope=args.scope) mirrors = spack.config.get('mirrors', scope=args.scope)
if not mirrors: if not mirrors:
mirrors = syaml_dict() mirrors = syaml_dict()
for name, u in mirrors.items(): if args.name in mirrors:
if name == args.name: tty.die("Mirror with name %s already exists." % args.name)
tty.die("Mirror with name %s already exists." % name)
if u == url:
tty.die("Mirror with url %s already exists." % url)
# should only be one item per mirror dict.
items = [(n, u) for n, u in mirrors.items()] items = [(n, u) for n, u in mirrors.items()]
items.insert(0, (args.name, url)) items.insert(0, (args.name, url))
@ -117,21 +125,86 @@ def mirror_remove(args):
old_value = mirrors.pop(name) old_value = mirrors.pop(name)
spack.config.set('mirrors', mirrors, scope=args.scope) spack.config.set('mirrors', mirrors, scope=args.scope)
tty.msg("Removed mirror %s with url %s" % (name, old_value))
debug_msg_url = "url %s"
debug_msg = ["Removed mirror %s with"]
values = [name]
try:
fetch_value = old_value['fetch']
push_value = old_value['push']
debug_msg.extend(("fetch", debug_msg_url, "and push", debug_msg_url))
values.extend((fetch_value, push_value))
except TypeError:
debug_msg.append(debug_msg_url)
values.append(old_value)
tty.debug(" ".join(debug_msg) % tuple(values))
tty.msg("Removed mirror %s." % name)
def mirror_set_url(args):
"""Change the URL of a mirror."""
url = url_util.format(args.url)
mirrors = spack.config.get('mirrors', scope=args.scope)
if not mirrors:
mirrors = syaml_dict()
if args.name not in mirrors:
tty.die("No mirror found with name %s." % args.name)
entry = mirrors[args.name]
try:
fetch_url = entry['fetch']
push_url = entry['push']
except TypeError:
fetch_url, push_url = entry, entry
changes_made = False
if args.push:
changes_made = changes_made or push_url != url
push_url = url
else:
changes_made = (
changes_made or fetch_url != push_url or push_url != url)
fetch_url, push_url = url, url
items = [
(
(n, u)
if n != args.name else (
(n, {"fetch": fetch_url, "push": push_url})
if fetch_url != push_url else (n, fetch_url)
)
)
for n, u in mirrors.items()
]
mirrors = syaml_dict(items)
spack.config.set('mirrors', mirrors, scope=args.scope)
if changes_made:
tty.msg(
"Changed%s url for mirror %s." %
((" (push)" if args.push else ""), args.name))
else:
tty.msg("Url already set for mirror %s." % args.name)
def mirror_list(args): def mirror_list(args):
"""Print out available mirrors to the console.""" """Print out available mirrors to the console."""
mirrors = spack.config.get('mirrors', scope=args.scope)
mirrors = spack.mirror.MirrorCollection(scope=args.scope)
if not mirrors: if not mirrors:
tty.msg("No mirrors configured.") tty.msg("No mirrors configured.")
return return
max_len = max(len(n) for n in mirrors.keys()) mirrors.display()
fmt = "%%-%ds%%s" % (max_len + 4)
for name in mirrors:
print(fmt % (name, mirrors[name]))
def _read_specs_from_file(filename): def _read_specs_from_file(filename):
@ -188,14 +261,13 @@ def mirror_create(args):
msg = 'Skipping {0} as it is an external spec.' msg = 'Skipping {0} as it is an external spec.'
tty.msg(msg.format(spec.cshort_spec)) tty.msg(msg.format(spec.cshort_spec))
# Default name for directory is spack-mirror-<DATESTAMP> mirror = spack.mirror.Mirror(
directory = args.directory args.directory or spack.config.get('config:source_cache'))
if not directory:
timestamp = datetime.now().strftime("%Y-%m-%d") directory = url_util.format(mirror.push_url)
directory = 'spack-mirror-' + timestamp
# Make sure nothing is in the way. # Make sure nothing is in the way.
existed = os.path.isdir(directory) existed = web_util.url_exists(directory)
# Actually do the work to create the mirror # Actually do the work to create the mirror
present, mirrored, error = spack.mirror.create( present, mirrored, error = spack.mirror.create(
@ -220,6 +292,7 @@ def mirror(parser, args):
'add': mirror_add, 'add': mirror_add,
'remove': mirror_remove, 'remove': mirror_remove,
'rm': mirror_remove, 'rm': mirror_remove,
'set-url': mirror_set_url,
'list': mirror_list} 'list': mirror_list}
if args.no_checksum: if args.no_checksum:

View file

@ -5,10 +5,8 @@
from __future__ import division, print_function from __future__ import division, print_function
from collections import defaultdict from collections import defaultdict
try:
from urllib.parse import urlparse import six.moves.urllib.parse as urllib_parse
except ImportError:
from urlparse import urlparse
import spack.fetch_strategy as fs import spack.fetch_strategy as fs
import spack.repo import spack.repo
@ -262,7 +260,7 @@ def add(self, fetcher):
self.checksums[algo] += 1 self.checksums[algo] += 1
# parse out the URL scheme (https/http/ftp/etc.) # parse out the URL scheme (https/http/ftp/etc.)
urlinfo = urlparse(fetcher.url) urlinfo = urllib_parse.urlparse(fetcher.url)
self.schemes[urlinfo.scheme] += 1 self.schemes[urlinfo.scheme] += 1
elif url_type == 'git': elif url_type == 'git':

View file

@ -23,6 +23,7 @@
Archive a source directory, e.g. for creating a mirror. Archive a source directory, e.g. for creating a mirror.
""" """
import os import os
import os.path
import sys import sys
import re import re
import shutil import shutil
@ -30,6 +31,7 @@
import xml.etree.ElementTree import xml.etree.ElementTree
from functools import wraps from functools import wraps
from six import string_types, with_metaclass from six import string_types, with_metaclass
import six.moves.urllib.parse as urllib_parse
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.filesystem import ( from llnl.util.filesystem import (
@ -39,6 +41,9 @@
import spack.error import spack.error
import spack.util.crypto as crypto import spack.util.crypto as crypto
import spack.util.pattern as pattern import spack.util.pattern as pattern
import spack.util.web as web_util
import spack.util.url as url_util
from spack.util.executable import which from spack.util.executable import which
from spack.util.string import comma_and, quote from spack.util.string import comma_and, quote
from spack.version import Version, ver from spack.version import Version, ver
@ -48,6 +53,17 @@
#: List of all fetch strategies, created by FetchStrategy metaclass. #: List of all fetch strategies, created by FetchStrategy metaclass.
all_strategies = [] all_strategies = []
CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE = (
"The contents of {subject} look like {content_type}. Either the URL"
" you are trying to use does not exist or you have an internet gateway"
" issue. You can remove the bad archive using 'spack clean"
" <package>', then try again using the correct URL.")
def warn_content_type_mismatch(subject, content_type='HTML'):
tty.warn(CONTENT_TYPE_MISMATCH_WARNING_TEMPLATE.format(
subject=subject, content_type=content_type))
def _needs_stage(fun): def _needs_stage(fun):
"""Many methods on fetch strategies require a stage to be set """Many methods on fetch strategies require a stage to be set
@ -351,12 +367,7 @@ def fetch(self):
content_types = re.findall(r'Content-Type:[^\r\n]+', headers, content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
flags=re.IGNORECASE) flags=re.IGNORECASE)
if content_types and 'text/html' in content_types[-1]: if content_types and 'text/html' in content_types[-1]:
msg = ("The contents of {0} look like HTML. Either the URL " warn_content_type_mismatch(self.archive_file or "the archive")
"you are trying to use does not exist or you have an "
"internet gateway issue. You can remove the bad archive "
"using 'spack clean <package>', then try again using "
"the correct URL.")
tty.warn(msg.format(self.archive_file or "the archive"))
if save_file: if save_file:
os.rename(partial_file, save_file) os.rename(partial_file, save_file)
@ -449,7 +460,10 @@ def archive(self, destination):
if not self.archive_file: if not self.archive_file:
raise NoArchiveFileError("Cannot call archive() before fetching.") raise NoArchiveFileError("Cannot call archive() before fetching.")
shutil.copyfile(self.archive_file, destination) web_util.push_to_url(
self.archive_file,
destination,
keep_original=True)
@_needs_stage @_needs_stage
def check(self): def check(self):
@ -1063,6 +1077,54 @@ def __str__(self):
return "[hg] %s" % self.url return "[hg] %s" % self.url
class S3FetchStrategy(URLFetchStrategy):
"""FetchStrategy that pulls from an S3 bucket."""
enabled = True
url_attr = 's3'
def __init__(self, *args, **kwargs):
try:
super(S3FetchStrategy, self).__init__(*args, **kwargs)
except ValueError:
if not kwargs.get('url'):
raise ValueError(
"S3FetchStrategy requires a url for fetching.")
@_needs_stage
def fetch(self):
if self.archive_file:
tty.msg("Already downloaded %s" % self.archive_file)
return
parsed_url = url_util.parse(self.url)
if parsed_url.scheme != 's3':
raise ValueError(
'S3FetchStrategy can only fetch from s3:// urls.')
tty.msg("Fetching %s" % self.url)
basename = os.path.basename(parsed_url.path)
with working_dir(self.stage.path):
_, headers, stream = web_util.read_from_url(self.url)
with open(basename, 'wb') as f:
shutil.copyfileobj(stream, f)
content_type = headers['Content-type']
if content_type == 'text/html':
warn_content_type_mismatch(self.archive_file or "the archive")
if self.stage.save_filename:
os.rename(
os.path.join(self.stage.path, basename),
self.stage.save_filename)
if not self.archive_file:
raise FailedDownloadError(self.url)
def from_url(url): def from_url(url):
"""Given a URL, find an appropriate fetch strategy for it. """Given a URL, find an appropriate fetch strategy for it.
Currently just gives you a URLFetchStrategy that uses curl. Currently just gives you a URLFetchStrategy that uses curl.
@ -1206,6 +1268,34 @@ def for_package_version(pkg, version):
raise InvalidArgsError(pkg, version, **args) raise InvalidArgsError(pkg, version, **args)
def from_url_scheme(url, *args, **kwargs):
"""Finds a suitable FetchStrategy by matching its url_attr with the scheme
in the given url."""
url = kwargs.get('url', url)
parsed_url = urllib_parse.urlparse(url, scheme='file')
scheme_mapping = (
kwargs.get('scheme_mapping') or
{
'file': 'url',
'http': 'url',
'https': 'url'
})
scheme = parsed_url.scheme
scheme = scheme_mapping.get(scheme, scheme)
for fetcher in all_strategies:
url_attr = getattr(fetcher, 'url_attr', None)
if url_attr and url_attr == scheme:
return fetcher(url, *args, **kwargs)
raise ValueError(
'No FetchStrategy found for url with scheme: "{SCHEME}"'.format(
SCHEME=parsed_url.scheme))
def from_list_url(pkg): def from_list_url(pkg):
"""If a package provides a URL which lists URLs for resources by """If a package provides a URL which lists URLs for resources by
version, this can can create a fetcher for a URL discovered for version, this can can create a fetcher for a URL discovered for

View file

@ -13,6 +13,18 @@
""" """
import sys import sys
import os import os
import os.path
import operator
import six
import ruamel.yaml.error as yaml_error
try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp from llnl.util.filesystem import mkdirp
@ -20,9 +32,205 @@
import spack.error import spack.error
import spack.url as url import spack.url as url
import spack.fetch_strategy as fs import spack.fetch_strategy as fs
from spack.spec import Spec import spack.util.spack_json as sjson
import spack.util.spack_yaml as syaml
import spack.util.url as url_util
import spack.spec
from spack.version import VersionList from spack.version import VersionList
from spack.util.compression import allowed_archive from spack.util.compression import allowed_archive
from spack.util.spack_yaml import syaml_dict
def _display_mirror_entry(size, name, url, type_=None):
if type_:
type_ = "".join((" (", type_, ")"))
else:
type_ = ""
print("%-*s%s%s" % (size + 4, name, url, type_))
class Mirror(object):
"""Represents a named location for storing source tarballs and binary
packages.
Mirrors have a fetch_url that indicate where and how artifacts are fetched
from them, and a push_url that indicate where and how artifacts are pushed
to them. These two URLs are usually the same.
"""
def __init__(self, fetch_url, push_url=None, name=None):
self._fetch_url = fetch_url
self._push_url = push_url
self._name = name
def to_json(self, stream=None):
return sjson.dump(self.to_dict(), stream)
def to_yaml(self, stream=None):
return syaml.dump(self.to_dict(), stream)
@staticmethod
def from_yaml(stream, name=None):
try:
data = syaml.load(stream)
return Mirror.from_dict(data, name)
except yaml_error.MarkedYAMLError as e:
raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
@staticmethod
def from_json(stream, name=None):
d = sjson.load(stream)
return Mirror.from_dict(d, name)
def to_dict(self):
if self._push_url is None:
return self._fetch_url
else:
return syaml_dict([
('fetch', self._fetch_url),
('push', self._push_url)])
@staticmethod
def from_dict(d, name=None):
if isinstance(d, six.string_types):
return Mirror(d, name=name)
else:
return Mirror(d['fetch'], d['push'], name)
def display(self, max_len=0):
if self._push_url is None:
_display_mirror_entry(max_len, self._name, self._fetch_url)
else:
_display_mirror_entry(
max_len, self._name, self._fetch_url, "fetch")
_display_mirror_entry(
max_len, self._name, self._push_url, "push")
def __str__(self):
name = self._name
if name is None:
name = ''
else:
name = ' "%s"' % name
if self._push_url is None:
return "[Mirror%s (%s)]" % (name, self._fetch_url)
return "[Mirror%s (fetch: %s, push: %s)]" % (
name, self._fetch_url, self._push_url)
def __repr__(self):
return ''.join((
'Mirror(',
', '.join(
'%s=%s' % (k, repr(v))
for k, v in (
('fetch_url', self._fetch_url),
('push_url', self._push_url),
('name', self._name))
if k == 'fetch_url' or v),
')'
))
@property
def name(self):
return self._name or "<unnamed>"
@property
def fetch_url(self):
return self._fetch_url
@fetch_url.setter
def fetch_url(self, url):
self._fetch_url = url
self._normalize()
@property
def push_url(self):
if self._push_url is None:
return self._fetch_url
return self._push_url
@push_url.setter
def push_url(self, url):
self._push_url = url
self._normalize()
def _normalize(self):
if self._push_url is not None and self._push_url == self._fetch_url:
self._push_url = None
class MirrorCollection(Mapping):
"""A mapping of mirror names to mirrors."""
def __init__(self, mirrors=None, scope=None):
self._mirrors = dict(
(name, Mirror.from_dict(mirror, name))
for name, mirror in (
mirrors.items() if mirrors is not None else
spack.config.get('mirrors', scope=scope).items()))
def to_json(self, stream=None):
return sjson.dump(self.to_dict(True), stream)
def to_yaml(self, stream=None):
return syaml.dump(self.to_dict(True), stream)
@staticmethod
def from_yaml(stream, name=None):
try:
data = syaml.load(stream)
return MirrorCollection(data)
except yaml_error.MarkedYAMLError as e:
raise syaml.SpackYAMLError("error parsing YAML spec:", str(e))
@staticmethod
def from_json(stream, name=None):
d = sjson.load(stream)
return MirrorCollection(d)
def to_dict(self, recursive=False):
return syaml_dict(sorted(
(
(k, (v.to_dict() if recursive else v))
for (k, v) in self._mirrors.items()
), key=operator.itemgetter(0)
))
@staticmethod
def from_dict(d):
return MirrorCollection(d)
def __getitem__(self, item):
return self._mirrors[item]
def display(self):
max_len = max(len(mirror.name) for mirror in self._mirrors.values())
for mirror in self._mirrors.values():
mirror.display(max_len)
def lookup(self, name_or_url):
"""Looks up and returns a Mirror.
If this MirrorCollection contains a named Mirror under the name
[name_or_url], then that mirror is returned. Otherwise, [name_or_url]
is assumed to be a mirror URL, and an anonymous mirror with the given
URL is returned.
"""
result = self.get(name_or_url)
if result is None:
result = Mirror(fetch_url=name_or_url)
return result
def __iter__(self):
return iter(self._mirrors)
def __len__(self):
return len(self._mirrors)
def mirror_archive_filename(spec, fetcher, resource_id=None): def mirror_archive_filename(spec, fetcher, resource_id=None):
@ -114,7 +322,7 @@ def get_matching_versions(specs, **kwargs):
# Generate only versions that satisfy the spec. # Generate only versions that satisfy the spec.
if spec.concrete or v.satisfies(spec.versions): if spec.concrete or v.satisfies(spec.versions):
s = Spec(pkg.name) s = spack.spec.Spec(pkg.name)
s.versions = VersionList([v]) s.versions = VersionList([v])
s.variants = spec.variants.copy() s.variants = spec.variants.copy()
# This is needed to avoid hanging references during the # This is needed to avoid hanging references during the
@ -166,12 +374,17 @@ def create(path, specs, **kwargs):
it creates specs for those versions. If the version satisfies any spec it creates specs for those versions. If the version satisfies any spec
in the specs list, it is downloaded and added to the mirror. in the specs list, it is downloaded and added to the mirror.
""" """
parsed = url_util.parse(path)
mirror_root = url_util.local_file_path(parsed)
# Make sure nothing is in the way. # Make sure nothing is in the way.
if os.path.isfile(path): if mirror_root and os.path.isfile(mirror_root):
raise MirrorError("%s already exists and is a file." % path) raise MirrorError("%s already exists and is a file." % mirror_root)
# automatically spec-ify anything in the specs array. # automatically spec-ify anything in the specs array.
specs = [s if isinstance(s, Spec) else Spec(s) for s in specs] specs = [
s if isinstance(s, spack.spec.Spec) else spack.spec.Spec(s)
for s in specs]
# Get concrete specs for each matching version of these specs. # Get concrete specs for each matching version of these specs.
version_specs = get_matching_versions( version_specs = get_matching_versions(
@ -180,8 +393,7 @@ def create(path, specs, **kwargs):
s.concretize() s.concretize()
# Get the absolute path of the root before we start jumping around. # Get the absolute path of the root before we start jumping around.
mirror_root = os.path.abspath(path) if mirror_root and not os.path.isdir(mirror_root):
if not os.path.isdir(mirror_root):
try: try:
mkdirp(mirror_root) mkdirp(mirror_root)
except OSError as e: except OSError as e:
@ -195,12 +407,12 @@ def create(path, specs, **kwargs):
'error': [] 'error': []
} }
mirror_cache = spack.caches.MirrorCache(mirror_root) mirror_cache = spack.caches.MirrorCache(parsed)
try: try:
spack.caches.mirror_cache = mirror_cache spack.caches.mirror_cache = mirror_cache
# Iterate through packages and download all safe tarballs for each # Iterate through packages and download all safe tarballs for each
for spec in version_specs: for spec in version_specs:
add_single_spec(spec, mirror_root, categories, **kwargs) add_single_spec(spec, parsed, categories, **kwargs)
finally: finally:
spack.caches.mirror_cache = None spack.caches.mirror_cache = None

View file

@ -0,0 +1,92 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
from io import BufferedReader
import six.moves.urllib.response as urllib_response
import six.moves.urllib.request as urllib_request
import six.moves.urllib.error as urllib_error
import spack.util.s3 as s3_util
import spack.util.url as url_util
import spack.util.web as web_util
# NOTE(opadron): Workaround issue in boto where its StreamingBody
# implementation is missing several APIs expected from IOBase. These missing
# APIs prevent the streams returned by boto from being passed as-are along to
# urllib.
#
# https://github.com/boto/botocore/issues/879
# https://github.com/python/cpython/pull/3249
class WrapStream(BufferedReader):
def __init__(self, raw):
raw.readable = lambda: True
raw.writable = lambda: False
raw.seekable = lambda: False
raw.closed = False
raw.flush = lambda: None
super(WrapStream, self).__init__(raw)
def detach(self):
self.raw = None
def read(self, *args, **kwargs):
return self.raw.read(*args, **kwargs)
def __getattr__(self, key):
return getattr(self.raw, key)
def _s3_open(url):
parsed = url_util.parse(url)
s3 = s3_util.create_s3_session(parsed)
bucket = parsed.netloc
key = parsed.path
if key.startswith('/'):
key = key[1:]
obj = s3.get_object(Bucket=bucket, Key=key)
# NOTE(opadron): Apply workaround here (see above)
stream = WrapStream(obj['Body'])
headers = web_util.standardize_header_names(
obj['ResponseMetadata']['HTTPHeaders'])
return url, headers, stream
class UrllibS3Handler(urllib_request.HTTPSHandler):
def s3_open(self, req):
orig_url = req.get_full_url()
from botocore.exceptions import ClientError
try:
url, headers, stream = _s3_open(orig_url)
return urllib_response.addinfourl(stream, headers, url)
except ClientError as err:
# if no such [KEY], but [KEY]/index.html exists,
# return that, instead.
if err.response['Error']['Code'] == 'NoSuchKey':
try:
_, headers, stream = _s3_open(
url_util.join(orig_url, 'index.html'))
return urllib_response.addinfourl(
stream, headers, orig_url)
except ClientError as err2:
if err.response['Error']['Code'] == 'NoSuchKey':
# raise original error
raise urllib_error.URLError(err)
raise urllib_error.URLError(err2)
raise urllib_error.URLError(err)
S3OpenerDirector = urllib_request.build_opener(UrllibS3Handler())
open = S3OpenerDirector.open

View file

@ -17,7 +17,19 @@
'default': {}, 'default': {},
'additionalProperties': False, 'additionalProperties': False,
'patternProperties': { 'patternProperties': {
r'\w[\w-]*': {'type': 'string'}, r'\w[\w-]*': {
'anyOf': [
{'type': 'string'},
{
'type': 'object',
'required': ['fetch', 'push'],
'properties': {
'fetch': {'type': 'string'},
'push': {'type': 'string'}
}
}
]
},
}, },
}, },
} }

View file

@ -12,7 +12,6 @@
import getpass import getpass
from six import string_types from six import string_types
from six import iteritems from six import iteritems
from six.moves.urllib.parse import urljoin
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp, can_access, install, install_tree from llnl.util.filesystem import mkdirp, can_access, install, install_tree
@ -20,12 +19,16 @@
import spack.paths import spack.paths
import spack.caches import spack.caches
import spack.cmd
import spack.config import spack.config
import spack.error import spack.error
import spack.mirror
import spack.util.lock import spack.util.lock
import spack.fetch_strategy as fs import spack.fetch_strategy as fs
import spack.util.pattern as pattern import spack.util.pattern as pattern
import spack.util.path as sup import spack.util.path as sup
import spack.util.url as url_util
from spack.util.crypto import prefix_bits, bit_length from spack.util.crypto import prefix_bits, bit_length
@ -252,7 +255,7 @@ def __init__(
# TODO: fetch/stage coupling needs to be reworked -- the logic # TODO: fetch/stage coupling needs to be reworked -- the logic
# TODO: here is convoluted and not modular enough. # TODO: here is convoluted and not modular enough.
if isinstance(url_or_fetch_strategy, string_types): if isinstance(url_or_fetch_strategy, string_types):
self.fetcher = fs.from_url(url_or_fetch_strategy) self.fetcher = fs.from_url_scheme(url_or_fetch_strategy)
elif isinstance(url_or_fetch_strategy, fs.FetchStrategy): elif isinstance(url_or_fetch_strategy, fs.FetchStrategy):
self.fetcher = url_or_fetch_strategy self.fetcher = url_or_fetch_strategy
else: else:
@ -397,16 +400,9 @@ def fetch(self, mirror_only=False):
# TODO: CompositeFetchStrategy here. # TODO: CompositeFetchStrategy here.
self.skip_checksum_for_mirror = True self.skip_checksum_for_mirror = True
if self.mirror_path: if self.mirror_path:
mirrors = spack.config.get('mirrors') urls = [
url_util.join(mirror.fetch_url, self.mirror_path)
# Join URLs of mirror roots with mirror paths. Because for mirror in spack.mirror.MirrorCollection().values()]
# urljoin() will strip everything past the final '/' in
# the root, so we add a '/' if it is not present.
mir_roots = [
sup.substitute_path_variables(root) if root.endswith(os.sep)
else sup.substitute_path_variables(root) + os.sep
for root in mirrors.values()]
urls = [urljoin(root, self.mirror_path) for root in mir_roots]
# If this archive is normally fetched from a tarball URL, # If this archive is normally fetched from a tarball URL,
# then use the same digest. `spack mirror` ensures that # then use the same digest. `spack mirror` ensures that
@ -425,9 +421,12 @@ def fetch(self, mirror_only=False):
# Add URL strategies for all the mirrors with the digest # Add URL strategies for all the mirrors with the digest
for url in urls: for url in urls:
fetchers.insert( fetchers.append(fs.from_url_scheme(
0, fs.URLFetchStrategy( url, digest, expand=expand, extension=extension))
url, digest, expand=expand, extension=extension)) # fetchers.insert(
# 0, fs.URLFetchStrategy(
# url, digest, expand=expand, extension=extension))
if self.default_fetcher.cachable: if self.default_fetcher.cachable:
fetchers.insert( fetchers.insert(
0, spack.caches.fetch_cache.fetcher( 0, spack.caches.fetch_cache.fetcher(
@ -708,6 +707,91 @@ def purge():
remove_linked_tree(stage_path) remove_linked_tree(stage_path)
def get_checksums_for_versions(
url_dict, name, first_stage_function=None, keep_stage=False):
"""Fetches and checksums archives from URLs.
This function is called by both ``spack checksum`` and ``spack
create``. The ``first_stage_function`` argument allows the caller to
inspect the first downloaded archive, e.g., to determine the build
system.
Args:
url_dict (dict): A dictionary of the form: version -> URL
name (str): The name of the package
first_stage_function (callable): function that takes a Stage and a URL;
this is run on the stage of the first URL downloaded
keep_stage (bool): whether to keep staging area when command completes
Returns:
(str): A multi-line string containing versions and corresponding hashes
"""
sorted_versions = sorted(url_dict.keys(), reverse=True)
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v in sorted_versions)
num_ver = len(sorted_versions)
tty.msg("Found {0} version{1} of {2}:".format(
num_ver, '' if num_ver == 1 else 's', name),
"",
*spack.cmd.elide_list(
["{0:{1}} {2}".format(str(v), max_len, url_dict[v])
for v in sorted_versions]))
tty.msg('')
archives_to_fetch = tty.get_number(
"How many would you like to checksum?", default=1, abort='q')
if not archives_to_fetch:
tty.die("Aborted.")
versions = sorted_versions[:archives_to_fetch]
urls = [url_dict[v] for v in versions]
tty.msg("Downloading...")
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)
# Checksum the archive and add it to the list
version_hashes.append((version, spack.util.crypto.checksum(
hashlib.sha256, stage.archive_file)))
i += 1
except FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))
if not version_hashes:
tty.die("Could not fetch any versions for {0}".format(name))
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)
# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}sha256='{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))
return version_lines
class StageError(spack.error.SpackError): class StageError(spack.error.SpackError):
""""Superclass for all errors encountered during staging.""" """"Superclass for all errors encountered during staging."""
@ -720,5 +804,9 @@ class RestageError(StageError):
""""Error encountered during restaging.""" """"Error encountered during restaging."""
class VersionFetchError(StageError):
"""Raised when we can't determine a URL to fetch a package."""
# Keep this in namespace for convenience # Keep this in namespace for convenience
FailedDownloadError = fs.FailedDownloadError FailedDownloadError = fs.FailedDownloadError

View file

@ -53,6 +53,8 @@ def mock_pkg_git_repo(tmpdir_factory):
# initial commit with mock packages # initial commit with mock packages
git('add', '.') git('add', '.')
git('config', 'user.email', 'testing@spack.io')
git('config', 'user.name', 'Spack Testing')
git('commit', '-m', 'initial mock repo commit') git('commit', '-m', 'initial mock repo commit')
# add commit with pkg-a, pkg-b, pkg-c packages # add commit with pkg-a, pkg-b, pkg-c packages

View file

@ -595,6 +595,7 @@ def test_bad_config_section(mock_config):
spack.config.get('foobar') spack.config.get('foobar')
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_bad_command_line_scopes(tmpdir, mock_config): def test_bad_command_line_scopes(tmpdir, mock_config):
cfg = spack.config.Configuration() cfg = spack.config.Configuration()

View file

@ -546,6 +546,7 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path):
timeout_write(lock_path, 5, 1)) timeout_write(lock_path, 5, 1))
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_read_lock_on_read_only_lockfile(lock_dir, lock_path): def test_read_lock_on_read_only_lockfile(lock_dir, lock_path):
"""read-only directory, read-only lockfile.""" """read-only directory, read-only lockfile."""
touch(lock_path) touch(lock_path)
@ -573,6 +574,7 @@ def test_read_lock_read_only_dir_writable_lockfile(lock_dir, lock_path):
pass pass
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_read_lock_no_lockfile(lock_dir, lock_path): def test_read_lock_no_lockfile(lock_dir, lock_path):
"""read-only directory, no lockfile (so can't create).""" """read-only directory, no lockfile (so can't create)."""
with read_only(lock_dir): with read_only(lock_dir):

View file

@ -653,6 +653,7 @@ def test_source_path_available(self, mock_stage_archive):
assert source_path.endswith(spack.stage._source_path_subdir) assert source_path.endswith(spack.stage._source_path_subdir)
assert not os.path.exists(source_path) assert not os.path.exists(source_path)
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_first_accessible_path(self, tmpdir): def test_first_accessible_path(self, tmpdir):
"""Test _first_accessible_path names.""" """Test _first_accessible_path names."""
spack_dir = tmpdir.join('paths') spack_dir = tmpdir.join('paths')
@ -783,6 +784,7 @@ def test_resolve_paths(self):
assert spack.stage._resolve_paths(paths) == res_paths assert spack.stage._resolve_paths(paths) == res_paths
@pytest.mark.skipif(os.getuid() == 0, reason='user is root')
def test_get_stage_root_bad_path(self, clear_stage_root): def test_get_stage_root_bad_path(self, clear_stage_root):
"""Ensure an invalid stage path root raises a StageError.""" """Ensure an invalid stage path root raises a StageError."""
with spack.config.override('config:build_stage', '/no/such/path'): with spack.config.override('config:build_stage', '/no/such/path'):

View file

@ -0,0 +1,44 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import os
import six.moves.urllib.parse as urllib_parse
import spack
import spack.util.url as url_util
def create_s3_session(url):
url = url_util.parse(url)
if url.scheme != 's3':
raise ValueError(
'Can not create S3 session from URL with scheme: {SCHEME}'.format(
SCHEME=url.scheme))
# NOTE(opadron): import boto and friends as late as possible. We don't
# want to require boto as a dependency unless the user actually wants to
# access S3 mirrors.
from boto3 import Session
session = Session()
s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')}
endpoint_url = os.environ.get('S3_ENDPOINT_URL')
if endpoint_url:
if urllib_parse.urlparse(endpoint_url, scheme=None).scheme is None:
endpoint_url = '://'.join(('https', endpoint_url))
s3_client_args['endpoint_url'] = endpoint_url
# if no access credentials provided above, then access anonymously
if not session.get_credentials():
from botocore import UNSIGNED
from botocore.client import Config
s3_client_args["config"] = Config(signature_version=UNSIGNED)
return session.client('s3', **s3_client_args)

175
lib/spack/spack/util/url.py Normal file
View file

@ -0,0 +1,175 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""
Utility functions for parsing, formatting, and manipulating URLs.
"""
import itertools
import os.path
from six import string_types
import six.moves.urllib.parse as urllib_parse
import spack.util.path
def _split_all(path):
"""Split path into its atomic components.
Returns the shortest list, L, of strings such that os.path.join(*L) == path
and os.path.split(element) == ('', element) for every element in L except
possibly the first. This first element may possibly have the value of '/',
or some other OS-dependent path root.
"""
result = []
a = path
old_a = None
while a != old_a:
(old_a, (a, b)) = a, os.path.split(a)
if a or b:
result.insert(0, b or '/')
return result
def local_file_path(url):
"""Get a local file path from a url.
If url is a file:// URL, return the absolute path to the local
file or directory referenced by it. Otherwise, return None.
"""
if isinstance(url, string_types):
url = parse(url)
if url.scheme == 'file':
return url.path
return None
def parse(url, scheme='file'):
"""Parse a mirror url.
For file:// URLs, the netloc and path components are concatenated and
passed through spack.util.path.canoncalize_path().
Otherwise, the returned value is the same as urllib's urlparse() with
allow_fragments=False.
"""
url_obj = (
urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False)
if isinstance(url, string_types) else url)
(scheme, netloc, path, params, query, _) = url_obj
scheme = (scheme or 'file').lower()
if scheme == 'file':
path = spack.util.path.canonicalize_path(netloc + path)
while path.startswith('//'):
path = path[1:]
netloc = ''
return urllib_parse.ParseResult(scheme=scheme,
netloc=netloc,
path=path,
params=params,
query=query,
fragment=None)
def format(parsed_url):
"""Format a URL string
Returns a canonicalized format of the given URL as a string.
"""
if isinstance(parsed_url, string_types):
parsed_url = parse(parsed_url)
return parsed_url.geturl()
def join(base_url, path, *extra, **kwargs):
"""Joins a base URL with one or more local URL path components
If resolve_href is True, treat the base URL as though it where the locator
of a web page, and the remaining URL path components as though they formed
a relative URL to be resolved against it (i.e.: as in os.path.join(...)).
The result is an absolute URL to the resource to which a user's browser
would navigate if they clicked on a link with an "href" attribute equal to
the relative URL.
If resolve_href is False (default), then the URL path components are joined
as in os.path.join().
Examples:
base_url = 's3://bucket/index.html'
body = fetch_body(prefix)
link = get_href(body) # link == '../other-bucket/document.txt'
# wrong - link is a local URL that needs to be resolved against base_url
spack.util.url.join(base_url, link)
's3://bucket/other_bucket/document.txt'
# correct - resolve local URL against base_url
spack.util.url.join(base_url, link, resolve_href=True)
's3://other_bucket/document.txt'
prefix = 'https://mirror.spack.io/build_cache'
# wrong - prefix is just a URL prefix
spack.util.url.join(prefix, 'my-package', resolve_href=True)
'https://mirror.spack.io/my-package'
# correct - simply append additional URL path components
spack.util.url.join(prefix, 'my-package', resolve_href=False) # default
'https://mirror.spack.io/build_cache/my-package'
"""
base_url = parse(base_url)
resolve_href = kwargs.get('resolve_href', False)
(scheme, netloc, base_path, params, query, _) = base_url
scheme = scheme.lower()
path_tokens = [
part for part in itertools.chain(
_split_all(path),
itertools.chain.from_iterable(
_split_all(extra_path) for extra_path in extra))
if part and part != '/']
base_path_args = ['/fake-root']
if scheme == 's3':
if netloc:
base_path_args.append(netloc)
if base_path.startswith('/'):
base_path = base_path[1:]
base_path_args.append(base_path)
if resolve_href:
new_base_path, _ = os.path.split(os.path.join(*base_path_args))
base_path_args = [new_base_path]
base_path_args.extend(path_tokens)
base_path = os.path.relpath(os.path.join(*base_path_args), '/fake-root')
if scheme == 's3':
path_tokens = [
part for part in _split_all(base_path)
if part and part != '/']
if path_tokens:
netloc = path_tokens.pop(0)
base_path = os.path.join('', *path_tokens)
return format(urllib_parse.ParseResult(scheme=scheme,
netloc=netloc,
path=base_path,
params=params,
query=query,
fragment=None))

View file

@ -5,16 +5,21 @@
from __future__ import print_function from __future__ import print_function
import codecs
import errno
import re import re
import os import os
import os.path
import shutil
import ssl import ssl
import sys import sys
import traceback import traceback
import hashlib
from itertools import product
import six
from six.moves.urllib.request import urlopen, Request from six.moves.urllib.request import urlopen, Request
from six.moves.urllib.error import URLError from six.moves.urllib.error import URLError
from six.moves.urllib.parse import urljoin
import multiprocessing.pool import multiprocessing.pool
try: try:
@ -28,20 +33,47 @@
class HTMLParseError(Exception): class HTMLParseError(Exception):
pass pass
from llnl.util.filesystem import mkdirp
import llnl.util.tty as tty import llnl.util.tty as tty
import spack.config
import spack.cmd import spack.cmd
import spack.url import spack.config
import spack.stage
import spack.error import spack.error
import spack.url
import spack.util.crypto import spack.util.crypto
import spack.util.s3 as s3_util
import spack.util.url as url_util
from spack.util.compression import ALLOWED_ARCHIVE_TYPES from spack.util.compression import ALLOWED_ARCHIVE_TYPES
# Timeout in seconds for web requests # Timeout in seconds for web requests
_timeout = 10 _timeout = 10
# See docstring for standardize_header_names()
_separators = ('', ' ', '_', '-')
HTTP_HEADER_NAME_ALIASES = {
"Accept-ranges": set(
''.join((A, 'ccept', sep, R, 'anges'))
for A, sep, R in product('Aa', _separators, 'Rr')),
"Content-length": set(
''.join((C, 'ontent', sep, L, 'ength'))
for C, sep, L in product('Cc', _separators, 'Ll')),
"Content-type": set(
''.join((C, 'ontent', sep, T, 'ype'))
for C, sep, T in product('Cc', _separators, 'Tt')),
"Date": set(('Date', 'date')),
"Last-modified": set(
''.join((L, 'ast', sep, M, 'odified'))
for L, sep, M in product('Ll', _separators, 'Mm')),
"Server": set(('Server', 'server'))
}
class LinkParser(HTMLParser): class LinkParser(HTMLParser):
"""This parser just takes an HTML page and strips out the hrefs on the """This parser just takes an HTML page and strips out the hrefs on the
@ -59,7 +91,7 @@ def handle_starttag(self, tag, attrs):
class NonDaemonProcess(multiprocessing.Process): class NonDaemonProcess(multiprocessing.Process):
"""Process tha allows sub-processes, so pools can have sub-pools.""" """Process that allows sub-processes, so pools can have sub-pools."""
@property @property
def daemon(self): def daemon(self):
return False return False
@ -86,25 +118,53 @@ def __init__(self, *args, **kwargs):
super(NonDaemonPool, self).__init__(*args, **kwargs) super(NonDaemonPool, self).__init__(*args, **kwargs)
def _read_from_url(url, accept_content_type=None): def uses_ssl(parsed_url):
if parsed_url.scheme == 'https':
return True
if parsed_url.scheme == 's3':
endpoint_url = os.environ.get('S3_ENDPOINT_URL')
if not endpoint_url:
return True
if url_util.parse(endpoint_url, scheme='https').scheme == 'https':
return True
return False
__UNABLE_TO_VERIFY_SSL = (
lambda pyver: (
(pyver < (2, 7, 9)) or
((3,) < pyver < (3, 4, 3))
))(sys.version_info)
def read_from_url(url, accept_content_type=None):
url = url_util.parse(url)
context = None context = None
verify_ssl = spack.config.get('config:verify_ssl') verify_ssl = spack.config.get('config:verify_ssl')
pyver = sys.version_info
if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)): # Don't even bother with a context unless the URL scheme is one that uses
# SSL certs.
if uses_ssl(url):
if verify_ssl: if verify_ssl:
tty.warn("Spack will not check SSL certificates. You need to " if __UNABLE_TO_VERIFY_SSL:
"update your Python to enable certificate " # User wants SSL verification, but it cannot be provided.
"verification.") warn_no_ssl_cert_checking()
elif verify_ssl: else:
# without a defined context, urlopen will not verify the ssl cert for # User wants SSL verification, and it *can* be provided.
# python 3.x context = ssl.create_default_context()
context = ssl.create_default_context() else:
else: # User has explicitly indicated that they do not want SSL
context = ssl._create_unverified_context() # verification.
context = ssl._create_unverified_context()
req = Request(url) req = Request(url_util.format(url))
content_type = None
if accept_content_type: is_web_url = url.scheme in ('http', 'https')
if accept_content_type and is_web_url:
# Make a HEAD request first to check the content type. This lets # Make a HEAD request first to check the content type. This lets
# us ignore tarballs and gigantic files. # us ignore tarballs and gigantic files.
# It would be nice to do this with the HTTP Accept header to avoid # It would be nice to do this with the HTTP Accept header to avoid
@ -113,29 +173,179 @@ def _read_from_url(url, accept_content_type=None):
req.get_method = lambda: "HEAD" req.get_method = lambda: "HEAD"
resp = _urlopen(req, timeout=_timeout, context=context) resp = _urlopen(req, timeout=_timeout, context=context)
if "Content-type" not in resp.headers: content_type = resp.headers.get('Content-type')
tty.debug("ignoring page " + url)
return None, None
if not resp.headers["Content-type"].startswith(accept_content_type):
tty.debug("ignoring page " + url + " with content type " +
resp.headers["Content-type"])
return None, None
# Do the real GET request when we know it's just HTML. # Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET" req.get_method = lambda: "GET"
response = _urlopen(req, timeout=_timeout, context=context) response = _urlopen(req, timeout=_timeout, context=context)
response_url = response.geturl()
# Read the page and and stick it in the map we'll return if accept_content_type and not is_web_url:
page = response.read().decode('utf-8') content_type = response.headers.get('Content-type')
return response_url, page reject_content_type = (
accept_content_type and (
content_type is None or
not content_type.startswith(accept_content_type)))
if reject_content_type:
tty.debug("ignoring page {0}{1}{2}".format(
url_util.format(url),
" with content type " if content_type is not None else "",
content_type or ""))
return None, None, None
return response.geturl(), response.headers, response
def read_from_url(url, accept_content_type=None): def warn_no_ssl_cert_checking():
resp_url, contents = _read_from_url(url, accept_content_type) tty.warn("Spack will not check SSL certificates. You need to update "
return contents "your Python to enable certificate verification.")
def push_to_url(local_path, remote_path, **kwargs):
keep_original = kwargs.get('keep_original', True)
local_url = url_util.parse(local_path)
local_file_path = url_util.local_file_path(local_url)
if local_file_path is None:
raise ValueError('local path must be a file:// url')
remote_url = url_util.parse(remote_path)
verify_ssl = spack.config.get('config:verify_ssl')
if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url):
warn_no_ssl_cert_checking()
remote_file_path = url_util.local_file_path(remote_url)
if remote_file_path is not None:
mkdirp(os.path.dirname(remote_file_path))
if keep_original:
shutil.copy(local_file_path, remote_file_path)
else:
try:
os.rename(local_file_path, remote_file_path)
except OSError as e:
if e.errno == errno.EXDEV:
# NOTE(opadron): The above move failed because it crosses
# filesystem boundaries. Copy the file (plus original
# metadata), and then delete the original. This operation
# needs to be done in separate steps.
shutil.copy2(local_file_path, remote_file_path)
os.remove(local_file_path)
elif remote_url.scheme == 's3':
extra_args = kwargs.get('extra_args', {})
remote_path = remote_url.path
while remote_path.startswith('/'):
remote_path = remote_path[1:]
s3 = s3_util.create_s3_session(remote_url)
s3.upload_file(local_file_path, remote_url.netloc,
remote_path, ExtraArgs=extra_args)
if not keep_original:
os.remove(local_file_path)
else:
raise NotImplementedError(
'Unrecognized URL scheme: {SCHEME}'.format(
SCHEME=remote_url.scheme))
def url_exists(url):
url = url_util.parse(url)
local_path = url_util.local_file_path(url)
if local_path:
return os.path.exists(local_path)
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
from botocore.exceptions import ClientError
try:
s3.get_object(Bucket=url.netloc, Key=url.path)
return True
except ClientError as err:
if err.response['Error']['Code'] == 'NoSuchKey':
return False
raise err
# otherwise, just try to "read" from the URL, and assume that *any*
# non-throwing response contains the resource represented by the URL
try:
read_from_url(url)
return True
except URLError:
return False
def remove_url(url):
url = url_util.parse(url)
local_path = url_util.local_file_path(url)
if local_path:
os.remove(local_path)
return
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
s3.delete_object(Bucket=url.s3_bucket, Key=url.path)
return
# Don't even try for other URL schemes.
def _list_s3_objects(client, url, num_entries, start_after=None):
list_args = dict(
Bucket=url.netloc,
Prefix=url.path,
MaxKeys=num_entries)
if start_after is not None:
list_args['StartAfter'] = start_after
result = client.list_objects_v2(**list_args)
last_key = None
if result['IsTruncated']:
last_key = result['Contents'][-1]['Key']
iter = (key for key in
(
os.path.relpath(entry['Key'], url.path)
for entry in result['Contents']
)
if key != '.')
return iter, last_key
def _iter_s3_prefix(client, url, num_entries=1024):
key = None
while True:
contents, key = _list_s3_objects(
client, url, num_entries, start_after=key)
for x in contents:
yield x
if not key:
break
def list_url(url):
url = url_util.parse(url)
local_path = url_util.local_file_path(url)
if local_path:
return os.listdir(local_path)
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
return list(set(
key.split('/', 1)[0]
for key in _iter_s3_prefix(s3, url)))
def _spider(url, visited, root, depth, max_depth, raise_on_error): def _spider(url, visited, root, depth, max_depth, raise_on_error):
@ -154,16 +364,12 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
pages = {} # dict from page URL -> text content. pages = {} # dict from page URL -> text content.
links = set() # set of all links seen on visited pages. links = set() # set of all links seen on visited pages.
# root may end with index.html -- chop that off.
if root.endswith('/index.html'):
root = re.sub('/index.html$', '', root)
try: try:
response_url, page = _read_from_url(url, 'text/html') response_url, _, response = read_from_url(url, 'text/html')
if not response_url or not response:
if not response_url or not page:
return pages, links return pages, links
page = codecs.getreader('utf-8')(response).read()
pages[response_url] = page pages[response_url] = page
# Parse out the links in the page # Parse out the links in the page
@ -173,8 +379,10 @@ def _spider(url, visited, root, depth, max_depth, raise_on_error):
while link_parser.links: while link_parser.links:
raw_link = link_parser.links.pop() raw_link = link_parser.links.pop()
abs_link = urljoin(response_url, raw_link.strip()) abs_link = url_util.join(
response_url,
raw_link.strip(),
resolve_href=True)
links.add(abs_link) links.add(abs_link)
# Skip stuff that looks like an archive # Skip stuff that looks like an archive
@ -243,16 +451,28 @@ def _spider_wrapper(args):
return _spider(*args) return _spider(*args)
def _urlopen(*args, **kwargs): def _urlopen(req, *args, **kwargs):
"""Wrapper for compatibility with old versions of Python.""" """Wrapper for compatibility with old versions of Python."""
# We don't pass 'context' parameter to urlopen because it url = req
# was introduces only starting versions 2.7.9 and 3.4.3 of Python. try:
if 'context' in kwargs and kwargs['context'] is None: url = url.get_full_url()
except AttributeError:
pass
# We don't pass 'context' parameter because it was only introduced starting
# with versions 2.7.9 and 3.4.3 of Python.
if 'context' in kwargs:
del kwargs['context'] del kwargs['context']
return urlopen(*args, **kwargs)
opener = urlopen
if url_util.parse(url).scheme == 's3':
import spack.s3_handler
opener = spack.s3_handler.open
return opener(req, *args, **kwargs)
def spider(root_url, depth=0): def spider(root, depth=0):
"""Gets web pages from a root URL. """Gets web pages from a root URL.
If depth is specified (e.g., depth=2), then this will also follow If depth is specified (e.g., depth=2), then this will also follow
@ -262,7 +482,9 @@ def spider(root_url, depth=0):
performance over a sequential fetch. performance over a sequential fetch.
""" """
pages, links = _spider(root_url, set(), root_url, 0, depth, False)
root = url_util.parse(root)
pages, links = _spider(root, set(), root, 0, depth, False)
return pages, links return pages, links
@ -356,99 +578,112 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
return versions return versions
def get_checksums_for_versions( def standardize_header_names(headers):
url_dict, name, first_stage_function=None, keep_stage=False): """Replace certain header names with standardized spellings.
"""Fetches and checksums archives from URLs.
This function is called by both ``spack checksum`` and ``spack Standardizes the spellings of the following header names:
create``. The ``first_stage_function`` argument allows the caller to - Accept-ranges
inspect the first downloaded archive, e.g., to determine the build - Content-length
system. - Content-type
- Date
- Last-modified
- Server
Args: Every name considered is translated to one of the above names if the only
url_dict (dict): A dictionary of the form: version -> URL difference between the two is how the first letters of each word are
name (str): The name of the package capitalized; whether words are separated; or, if separated, whether they
first_stage_function (callable): function that takes a Stage and a URL; are so by a dash (-), underscore (_), or space ( ). Header names that
this is run on the stage of the first URL downloaded cannot be mapped as described above are returned unaltered.
keep_stage (bool): whether to keep staging area when command completes
Returns: For example: The standard spelling of "Content-length" would be substituted
(str): A multi-line string containing versions and corresponding hashes for any of the following names:
- Content-length
- content_length
- contentlength
- content_Length
- contentLength
- content Length
... and any other header name, such as "Content-encoding", would not be
altered, regardless of spelling.
If headers is a string, then it (or an appropriate substitute) is returned.
If headers is a non-empty tuple, headers[0] is a string, and there exists a
standardized spelling for header[0] that differs from it, then a new tuple
is returned. This tuple has the same elements as headers, except the first
element is the standardized spelling for headers[0].
If headers is a sequence, then a new list is considered, where each element
is its corresponding element in headers, but mapped as above if a string or
tuple. This new list is returned if at least one of its elements differ
from their corrsponding element in headers.
If headers is a mapping, then a new dict is considered, where the key in
each item is the key of its corresponding item in headers, mapped as above
if a string or tuple. The value is taken from the corresponding item. If
the keys of multiple items in headers map to the same key after being
standardized, then the value for the resulting item is undefined. The new
dict is returned if at least one of its items has a key that differs from
that of their corresponding item in headers, or if the keys of multiple
items in headers map to the same key after being standardized.
In all other cases headers is returned unaltered.
""" """
sorted_versions = sorted(url_dict.keys(), reverse=True) if isinstance(headers, six.string_types):
for standardized_spelling, other_spellings in (
HTTP_HEADER_NAME_ALIASES.items()):
if headers in other_spellings:
if headers == standardized_spelling:
return headers
return standardized_spelling
return headers
# Find length of longest string in the list for padding if isinstance(headers, tuple):
max_len = max(len(str(v)) for v in sorted_versions) if not headers:
num_ver = len(sorted_versions) return headers
old = headers[0]
if isinstance(old, six.string_types):
new = standardize_header_names(old)
if old is not new:
return (new,) + headers[1:]
return headers
tty.msg("Found {0} version{1} of {2}:".format( try:
num_ver, '' if num_ver == 1 else 's', name), changed = False
"", new_dict = {}
*spack.cmd.elide_list( for key, value in headers.items():
["{0:{1}} {2}".format(str(v), max_len, url_dict[v]) if isinstance(key, (tuple, six.string_types)):
for v in sorted_versions])) old_key, key = key, standardize_header_names(key)
print() changed = changed or key is not old_key
archives_to_fetch = tty.get_number( new_dict[key] = value
"How many would you like to checksum?", default=1, abort='q')
if not archives_to_fetch: return new_dict if changed else headers
tty.die("Aborted.") except (AttributeError, TypeError, ValueError):
pass
versions = sorted_versions[:archives_to_fetch] try:
urls = [url_dict[v] for v in versions] changed = False
new_list = []
for item in headers:
if isinstance(item, (tuple, six.string_types)):
old_item, item = item, standardize_header_names(item)
changed = changed or item is not old_item
tty.msg("Downloading...") new_list.append(item)
version_hashes = []
i = 0
for url, version in zip(urls, versions):
try:
with spack.stage.Stage(url, keep=keep_stage) as stage:
# Fetch the archive
stage.fetch()
if i == 0 and first_stage_function:
# Only run first_stage_function the first time,
# no need to run it every time
first_stage_function(stage, url)
# Checksum the archive and add it to the list return new_list if changed else headers
version_hashes.append((version, spack.util.crypto.checksum( except TypeError:
hashlib.sha256, stage.archive_file))) pass
i += 1
except spack.stage.FailedDownloadError:
tty.msg("Failed to fetch {0}".format(url))
except Exception as e:
tty.msg("Something failed on {0}, skipping.".format(url),
" ({0})".format(e))
if not version_hashes: return headers
tty.die("Could not fetch any versions for {0}".format(name))
# Find length of longest string in the list for padding
max_len = max(len(str(v)) for v, h in version_hashes)
# Generate the version directives to put in a package.py
version_lines = "\n".join([
" version('{0}', {1}sha256='{2}')".format(
v, ' ' * (max_len - len(str(v))), h) for v, h in version_hashes
])
num_hash = len(version_hashes)
tty.msg("Checksummed {0} version{1} of {2}".format(
num_hash, '' if num_hash == 1 else 's', name))
return version_lines
class SpackWebError(spack.error.SpackError): class SpackWebError(spack.error.SpackError):
"""Superclass for Spack web spidering errors.""" """Superclass for Spack web spidering errors."""
class VersionFetchError(SpackWebError):
"""Raised when we can't determine a URL to fetch a package."""
class NoNetworkConnectionError(SpackWebError): class NoNetworkConnectionError(SpackWebError):
"""Raised when an operation can't get an internet connection.""" """Raised when an operation can't get an internet connection."""
def __init__(self, message, url): def __init__(self, message, url):