Follow up/11117 fixes and testing (#13607)

* fix docstring in generate_package_index() refering to "public" keys as "signing" keys

* use explicit kwargs in push_to_url()

* simplify url_util.parse() per tgamblin's suggestion

* replace standardize_header_names() with the much simpler get_header()

* add some basic tests

* update s3_fetch tests

* update S3 list code to strip leading slashes from prefix

* correct minor warning regression introduced in #11117

* add more tests

* flake8 fixes

* add capsys fixture to mirror_crud test

* add get_header() tests

* use get_header() in more places

* incorporate review comments
This commit is contained in:
Omar Padron 2019-12-09 17:23:33 -05:00 committed by GitHub
parent da9a562182
commit 0592c58030
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 270 additions and 156 deletions

View file

@ -272,7 +272,7 @@ def generate_package_index(cache_prefix):
Creates (or replaces) the "index.html" page at the location given in
cache_prefix. This page contains a link for each binary package (*.yaml)
and signing key (*.key) under cache_prefix.
and public key (*.key) under cache_prefix.
"""
tmpdir = tempfile.mkdtemp()
try:
@ -679,7 +679,7 @@ def get_specs(force=False):
return _cached_specs
if not spack.mirror.MirrorCollection():
tty.warn("No Spack mirrors are currently configured")
tty.debug("No Spack mirrors are currently configured")
return {}
urls = set()

View file

@ -1142,7 +1142,7 @@ def fetch(self):
with open(basename, 'wb') as f:
shutil.copyfileobj(stream, f)
content_type = headers['Content-type']
content_type = web_util.get_header(headers, 'Content-type')
if content_type == 'text/html':
warn_content_type_mismatch(self.archive_file or "the archive")

View file

@ -11,7 +11,6 @@
import spack.util.s3 as s3_util
import spack.util.url as url_util
import spack.util.web as web_util
# NOTE(opadron): Workaround issue in boto where its StreamingBody
@ -54,8 +53,7 @@ def _s3_open(url):
# NOTE(opadron): Apply workaround here (see above)
stream = WrapStream(obj['Body'])
headers = web_util.standardize_header_names(
obj['ResponseMetadata']['HTTPHeaders'])
headers = obj['ResponseMetadata']['HTTPHeaders']
return url, headers, stream

View file

@ -0,0 +1,41 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import pytest
import os
import os.path
import spack.spec
import spack.binary_distribution
install = spack.main.SpackCommand('install')
def test_build_tarball_overwrite(
install_mockery, mock_fetch, monkeypatch, tmpdir):
with tmpdir.as_cwd():
spec = spack.spec.Spec('trivial-install-test-package').concretized()
install(str(spec))
# Runs fine the first time, throws the second time
spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
with pytest.raises(spack.binary_distribution.NoOverwriteException):
spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
# Should work fine with force=True
spack.binary_distribution.build_tarball(
spec, '.', force=True, unsigned=True)
# Remove the tarball and try again.
# This must *also* throw, because of the existing .spec.yaml file
os.remove(os.path.join(
spack.binary_distribution.build_cache_prefix('.'),
spack.binary_distribution.tarball_directory_name(spec),
spack.binary_distribution.tarball_name(spec, '.spack')))
with pytest.raises(spack.binary_distribution.NoOverwriteException):
spack.binary_distribution.build_tarball(spec, '.', unsigned=True)

View file

@ -6,7 +6,7 @@
import pytest
import os
from spack.main import SpackCommand
from spack.main import SpackCommand, SpackCommandError
import spack.environment as ev
import spack.config
@ -16,6 +16,25 @@
concretize = SpackCommand('concretize')
@pytest.fixture
def tmp_scope():
"""Creates a temporary configuration scope"""
base_name = 'internal-testing-scope'
current_overrides = set(
x.name for x in
spack.config.config.matching_scopes(r'^{0}'.format(base_name)))
num_overrides = 0
scope_name = base_name
while scope_name in current_overrides:
scope_name = '{0}{1}'.format(base_name, num_overrides)
num_overrides += 1
with spack.config.override(spack.config.InternalConfigScope(scope_name)):
yield scope_name
@pytest.mark.disable_clean_stage_check
@pytest.mark.regression('8083')
def test_regression_8083(tmpdir, capfd, mock_packages, mock_fetch, config):
@ -45,3 +64,49 @@ def test_mirror_from_env(tmpdir, mock_packages, mock_fetch, config,
mirror_res = os.listdir(os.path.join(mirror_dir, spec.name))
expected = ['%s.tar.gz' % spec.format('{name}-{version}')]
assert mirror_res == expected
def test_mirror_crud(tmp_scope, capsys):
with capsys.disabled():
mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io')
output = mirror('remove', '--scope', tmp_scope, 'mirror')
assert 'Removed mirror' in output
mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io')
# no-op
output = mirror('set-url', '--scope', tmp_scope,
'mirror', 'http://spack.io')
assert 'Url already set' in output
output = mirror('set-url', '--scope', tmp_scope,
'--push', 'mirror', 's3://spack-public')
assert 'Changed (push) url' in output
# no-op
output = mirror('set-url', '--scope', tmp_scope,
'--push', 'mirror', 's3://spack-public')
assert 'Url already set' in output
output = mirror('remove', '--scope', tmp_scope, 'mirror')
assert 'Removed mirror' in output
output = mirror('list', '--scope', tmp_scope)
assert 'No mirrors configured' in output
def test_mirror_nonexisting(tmp_scope):
with pytest.raises(SpackCommandError):
mirror('remove', '--scope', tmp_scope, 'not-a-mirror')
with pytest.raises(SpackCommandError):
mirror('set-url', '--scope', tmp_scope,
'not-a-mirror', 'http://spack.io')
def test_mirror_name_collision(tmp_scope):
mirror('add', '--scope', tmp_scope, 'first', '1')
with pytest.raises(SpackCommandError):
mirror('add', '--scope', tmp_scope, 'first', '1')

View file

@ -0,0 +1,17 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import pytest
from spack.fetch_strategy import from_url_scheme
def test_fetchstrategy_bad_url_scheme():
"""Ensure that trying to make a fetch strategy from a URL with an
unsupported scheme fails as expected."""
with pytest.raises(ValueError):
fetcher = from_url_scheme( # noqa: F841
'bogus-scheme://example.com/a/b/c')

View file

@ -0,0 +1,29 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import pytest
import spack.fetch_strategy as spack_fs
import spack.stage as spack_stage
def test_s3fetchstrategy_sans_url():
"""Ensure constructor with no URL fails."""
with pytest.raises(ValueError):
spack_fs.S3FetchStrategy(None)
def test_s3fetchstrategy_bad_url(tmpdir):
"""Ensure fetch with bad URL fails as expected."""
testpath = str(tmpdir)
fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
assert fetcher is not None
with spack_stage.Stage(fetcher, path=testpath) as stage:
assert stage is not None
assert fetcher.archive_file is None
with pytest.raises(spack_fs.FetchError):
fetcher.fetch()

View file

@ -5,9 +5,12 @@
"""Tests for web.py."""
import os
import pytest
from ordereddict_backport import OrderedDict
import spack.paths
from spack.util.web import spider, find_versions_of_archive
import spack.util.web as web_util
from spack.version import ver
@ -23,7 +26,7 @@
def test_spider_0():
pages, links = spider(root, depth=0)
pages, links = web_util.spider(root, depth=0)
assert root in pages
assert page_1 not in pages
@ -41,7 +44,7 @@ def test_spider_0():
def test_spider_1():
pages, links = spider(root, depth=1)
pages, links = web_util.spider(root, depth=1)
assert root in pages
assert page_1 in pages
@ -60,7 +63,7 @@ def test_spider_1():
def test_spider_2():
pages, links = spider(root, depth=2)
pages, links = web_util.spider(root, depth=2)
assert root in pages
assert page_1 in pages
@ -81,7 +84,7 @@ def test_spider_2():
def test_spider_3():
pages, links = spider(root, depth=3)
pages, links = web_util.spider(root, depth=3)
assert root in pages
assert page_1 in pages
@ -104,31 +107,36 @@ def test_spider_3():
def test_find_versions_of_archive_0():
versions = find_versions_of_archive(root_tarball, root, list_depth=0)
versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=0)
assert ver('0.0.0') in versions
def test_find_versions_of_archive_1():
versions = find_versions_of_archive(root_tarball, root, list_depth=1)
versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=1)
assert ver('0.0.0') in versions
assert ver('1.0.0') in versions
def test_find_versions_of_archive_2():
versions = find_versions_of_archive(root_tarball, root, list_depth=2)
versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=2)
assert ver('0.0.0') in versions
assert ver('1.0.0') in versions
assert ver('2.0.0') in versions
def test_find_exotic_versions_of_archive_2():
versions = find_versions_of_archive(root_tarball, root, list_depth=2)
versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=2)
# up for grabs to make this better.
assert ver('2.0.0b2') in versions
def test_find_versions_of_archive_3():
versions = find_versions_of_archive(root_tarball, root, list_depth=3)
versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=3)
assert ver('0.0.0') in versions
assert ver('1.0.0') in versions
assert ver('2.0.0') in versions
@ -137,7 +145,49 @@ def test_find_versions_of_archive_3():
def test_find_exotic_versions_of_archive_3():
versions = find_versions_of_archive(root_tarball, root, list_depth=3)
versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=3)
assert ver('2.0.0b2') in versions
assert ver('3.0a1') in versions
assert ver('4.5-rc5') in versions
def test_get_header():
headers = {
'Content-type': 'text/plain'
}
# looking up headers should just work like a plain dict
# lookup when there is an entry with the right key
assert(web_util.get_header(headers, 'Content-type') == 'text/plain')
# looking up headers should still work if there is a fuzzy match
assert(web_util.get_header(headers, 'contentType') == 'text/plain')
# ...unless there is an exact match for the "fuzzy" spelling.
headers['contentType'] = 'text/html'
assert(web_util.get_header(headers, 'contentType') == 'text/html')
# If lookup has to fallback to fuzzy matching and there are more than one
# fuzzy match, the result depends on the internal ordering of the given
# mapping
headers = OrderedDict()
headers['Content-type'] = 'text/plain'
headers['contentType'] = 'text/html'
assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain')
del headers['Content-type']
assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html')
# Same as above, but different ordering
headers = OrderedDict()
headers['contentType'] = 'text/html'
headers['Content-type'] = 'text/plain'
assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html')
del headers['contentType']
assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain')
# If there isn't even a fuzzy match, raise KeyError
with pytest.raises(KeyError):
web_util.get_header(headers, 'ContentLength')

View file

@ -9,6 +9,7 @@
import itertools
import os.path
import re
from six import string_types
import six.moves.urllib.parse as urllib_parse
@ -69,8 +70,7 @@ def parse(url, scheme='file'):
if scheme == 'file':
path = spack.util.path.canonicalize_path(netloc + path)
while path.startswith('//'):
path = path[1:]
path = re.sub(r'^/+', '/', path)
netloc = ''
return urllib_parse.ParseResult(scheme=scheme,

View file

@ -15,9 +15,6 @@
import sys
import traceback
from itertools import product
import six
from six.moves.urllib.request import urlopen, Request
from six.moves.urllib.error import URLError
import multiprocessing.pool
@ -50,30 +47,6 @@ class HTMLParseError(Exception):
# Timeout in seconds for web requests
_timeout = 10
# See docstring for standardize_header_names()
_separators = ('', ' ', '_', '-')
HTTP_HEADER_NAME_ALIASES = {
"Accept-ranges": set(
''.join((A, 'ccept', sep, R, 'anges'))
for A, sep, R in product('Aa', _separators, 'Rr')),
"Content-length": set(
''.join((C, 'ontent', sep, L, 'ength'))
for C, sep, L in product('Cc', _separators, 'Ll')),
"Content-type": set(
''.join((C, 'ontent', sep, T, 'ype'))
for C, sep, T in product('Cc', _separators, 'Tt')),
"Date": set(('Date', 'date')),
"Last-modified": set(
''.join((L, 'ast', sep, M, 'odified'))
for L, sep, M in product('Ll', _separators, 'Mm')),
"Server": set(('Server', 'server'))
}
class LinkParser(HTMLParser):
"""This parser just takes an HTML page and strips out the hrefs on the
@ -173,7 +146,7 @@ def read_from_url(url, accept_content_type=None):
req.get_method = lambda: "HEAD"
resp = _urlopen(req, timeout=_timeout, context=context)
content_type = resp.headers.get('Content-type')
content_type = get_header(resp.headers, 'Content-type')
# Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET"
@ -185,7 +158,7 @@ def read_from_url(url, accept_content_type=None):
ERROR=str(err)))
if accept_content_type and not is_web_url:
content_type = response.headers.get('Content-type')
content_type = get_header(response.headers, 'Content-type')
reject_content_type = (
accept_content_type and (
@ -208,9 +181,8 @@ def warn_no_ssl_cert_checking():
"your Python to enable certificate verification.")
def push_to_url(local_file_path, remote_path, **kwargs):
keep_original = kwargs.get('keep_original', True)
def push_to_url(
local_file_path, remote_path, keep_original=True, extra_args=None):
remote_url = url_util.parse(remote_path)
verify_ssl = spack.config.get('config:verify_ssl')
@ -235,7 +207,8 @@ def push_to_url(local_file_path, remote_path, **kwargs):
os.remove(local_file_path)
elif remote_url.scheme == 's3':
extra_args = kwargs.get('extra_args', {})
if extra_args is None:
extra_args = {}
remote_path = remote_url.path
while remote_path.startswith('/'):
@ -296,10 +269,25 @@ def remove_url(url):
# Don't even try for other URL schemes.
def _list_s3_objects(client, url, num_entries, start_after=None):
def _iter_s3_contents(contents, prefix):
for entry in contents:
key = entry['Key']
if not key.startswith('/'):
key = '/' + key
key = os.path.relpath(key, prefix)
if key == '.':
continue
yield key
def _list_s3_objects(client, bucket, prefix, num_entries, start_after=None):
list_args = dict(
Bucket=url.netloc,
Prefix=url.path,
Bucket=bucket,
Prefix=prefix[1:],
MaxKeys=num_entries)
if start_after is not None:
@ -311,21 +299,19 @@ def _list_s3_objects(client, url, num_entries, start_after=None):
if result['IsTruncated']:
last_key = result['Contents'][-1]['Key']
iter = (key for key in
(
os.path.relpath(entry['Key'], url.path)
for entry in result['Contents']
)
if key != '.')
iter = _iter_s3_contents(result['Contents'], prefix)
return iter, last_key
def _iter_s3_prefix(client, url, num_entries=1024):
key = None
bucket = url.netloc
prefix = re.sub(r'^/*', '/', url.path)
while True:
contents, key = _list_s3_objects(
client, url, num_entries, start_after=key)
client, bucket, prefix, num_entries, start_after=key)
for x in contents:
yield x
@ -577,106 +563,34 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
return versions
def standardize_header_names(headers):
"""Replace certain header names with standardized spellings.
def get_header(headers, header_name):
"""Looks up a dict of headers for the given header value.
Standardizes the spellings of the following header names:
- Accept-ranges
- Content-length
- Content-type
- Date
- Last-modified
- Server
Looks up a dict of headers, [headers], for a header value given by
[header_name]. Returns headers[header_name] if header_name is in headers.
Otherwise, the first fuzzy match is returned, if any.
Every name considered is translated to one of the above names if the only
difference between the two is how the first letters of each word are
capitalized; whether words are separated; or, if separated, whether they
are so by a dash (-), underscore (_), or space ( ). Header names that
cannot be mapped as described above are returned unaltered.
This fuzzy matching is performed by discarding word separators and
capitalization, so that for example, "Content-length", "content_length",
"conTENtLength", etc., all match. In the case of multiple fuzzy-matches,
the returned value is the "first" such match given the underlying mapping's
ordering, or unspecified if no such ordering is defined.
For example: The standard spelling of "Content-length" would be substituted
for any of the following names:
- Content-length
- content_length
- contentlength
- content_Length
- contentLength
- content Length
... and any other header name, such as "Content-encoding", would not be
altered, regardless of spelling.
If headers is a string, then it (or an appropriate substitute) is returned.
If headers is a non-empty tuple, headers[0] is a string, and there exists a
standardized spelling for header[0] that differs from it, then a new tuple
is returned. This tuple has the same elements as headers, except the first
element is the standardized spelling for headers[0].
If headers is a sequence, then a new list is considered, where each element
is its corresponding element in headers, but mapped as above if a string or
tuple. This new list is returned if at least one of its elements differ
from their corrsponding element in headers.
If headers is a mapping, then a new dict is considered, where the key in
each item is the key of its corresponding item in headers, mapped as above
if a string or tuple. The value is taken from the corresponding item. If
the keys of multiple items in headers map to the same key after being
standardized, then the value for the resulting item is undefined. The new
dict is returned if at least one of its items has a key that differs from
that of their corresponding item in headers, or if the keys of multiple
items in headers map to the same key after being standardized.
In all other cases headers is returned unaltered.
If header_name is not in headers, and no such fuzzy match exists, then a
KeyError is raised.
"""
if isinstance(headers, six.string_types):
for standardized_spelling, other_spellings in (
HTTP_HEADER_NAME_ALIASES.items()):
if headers in other_spellings:
if headers == standardized_spelling:
return headers
return standardized_spelling
return headers
if isinstance(headers, tuple):
if not headers:
return headers
old = headers[0]
if isinstance(old, six.string_types):
new = standardize_header_names(old)
if old is not new:
return (new,) + headers[1:]
return headers
def unfuzz(header):
return re.sub(r'[ _-]', '', header).lower()
try:
changed = False
new_dict = {}
for key, value in headers.items():
if isinstance(key, (tuple, six.string_types)):
old_key, key = key, standardize_header_names(key)
changed = changed or key is not old_key
new_dict[key] = value
return new_dict if changed else headers
except (AttributeError, TypeError, ValueError):
pass
try:
changed = False
new_list = []
for item in headers:
if isinstance(item, (tuple, six.string_types)):
old_item, item = item, standardize_header_names(item)
changed = changed or item is not old_item
new_list.append(item)
return new_list if changed else headers
except TypeError:
pass
return headers
return headers[header_name]
except KeyError:
unfuzzed_header_name = unfuzz(header_name)
for header, value in headers.items():
if unfuzz(header) == unfuzzed_header_name:
return value
raise
class SpackWebError(spack.error.SpackError):