Follow up/11117 fixes and testing (#13607)

* fix docstring in generate_package_index() refering to "public" keys as "signing" keys

* use explicit kwargs in push_to_url()

* simplify url_util.parse() per tgamblin's suggestion

* replace standardize_header_names() with the much simpler get_header()

* add some basic tests

* update s3_fetch tests

* update S3 list code to strip leading slashes from prefix

* correct minor warning regression introduced in #11117

* add more tests

* flake8 fixes

* add capsys fixture to mirror_crud test

* add get_header() tests

* use get_header() in more places

* incorporate review comments
This commit is contained in:
Omar Padron 2019-12-09 17:23:33 -05:00 committed by GitHub
parent da9a562182
commit 0592c58030
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 270 additions and 156 deletions

View file

@ -272,7 +272,7 @@ def generate_package_index(cache_prefix):
Creates (or replaces) the "index.html" page at the location given in Creates (or replaces) the "index.html" page at the location given in
cache_prefix. This page contains a link for each binary package (*.yaml) cache_prefix. This page contains a link for each binary package (*.yaml)
and signing key (*.key) under cache_prefix. and public key (*.key) under cache_prefix.
""" """
tmpdir = tempfile.mkdtemp() tmpdir = tempfile.mkdtemp()
try: try:
@ -679,7 +679,7 @@ def get_specs(force=False):
return _cached_specs return _cached_specs
if not spack.mirror.MirrorCollection(): if not spack.mirror.MirrorCollection():
tty.warn("No Spack mirrors are currently configured") tty.debug("No Spack mirrors are currently configured")
return {} return {}
urls = set() urls = set()

View file

@ -1142,7 +1142,7 @@ def fetch(self):
with open(basename, 'wb') as f: with open(basename, 'wb') as f:
shutil.copyfileobj(stream, f) shutil.copyfileobj(stream, f)
content_type = headers['Content-type'] content_type = web_util.get_header(headers, 'Content-type')
if content_type == 'text/html': if content_type == 'text/html':
warn_content_type_mismatch(self.archive_file or "the archive") warn_content_type_mismatch(self.archive_file or "the archive")

View file

@ -11,7 +11,6 @@
import spack.util.s3 as s3_util import spack.util.s3 as s3_util
import spack.util.url as url_util import spack.util.url as url_util
import spack.util.web as web_util
# NOTE(opadron): Workaround issue in boto where its StreamingBody # NOTE(opadron): Workaround issue in boto where its StreamingBody
@ -54,8 +53,7 @@ def _s3_open(url):
# NOTE(opadron): Apply workaround here (see above) # NOTE(opadron): Apply workaround here (see above)
stream = WrapStream(obj['Body']) stream = WrapStream(obj['Body'])
headers = web_util.standardize_header_names( headers = obj['ResponseMetadata']['HTTPHeaders']
obj['ResponseMetadata']['HTTPHeaders'])
return url, headers, stream return url, headers, stream

View file

@ -0,0 +1,41 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import pytest
import os
import os.path
import spack.spec
import spack.binary_distribution
install = spack.main.SpackCommand('install')
def test_build_tarball_overwrite(
install_mockery, mock_fetch, monkeypatch, tmpdir):
with tmpdir.as_cwd():
spec = spack.spec.Spec('trivial-install-test-package').concretized()
install(str(spec))
# Runs fine the first time, throws the second time
spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
with pytest.raises(spack.binary_distribution.NoOverwriteException):
spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
# Should work fine with force=True
spack.binary_distribution.build_tarball(
spec, '.', force=True, unsigned=True)
# Remove the tarball and try again.
# This must *also* throw, because of the existing .spec.yaml file
os.remove(os.path.join(
spack.binary_distribution.build_cache_prefix('.'),
spack.binary_distribution.tarball_directory_name(spec),
spack.binary_distribution.tarball_name(spec, '.spack')))
with pytest.raises(spack.binary_distribution.NoOverwriteException):
spack.binary_distribution.build_tarball(spec, '.', unsigned=True)

View file

@ -6,7 +6,7 @@
import pytest import pytest
import os import os
from spack.main import SpackCommand from spack.main import SpackCommand, SpackCommandError
import spack.environment as ev import spack.environment as ev
import spack.config import spack.config
@ -16,6 +16,25 @@
concretize = SpackCommand('concretize') concretize = SpackCommand('concretize')
@pytest.fixture
def tmp_scope():
"""Creates a temporary configuration scope"""
base_name = 'internal-testing-scope'
current_overrides = set(
x.name for x in
spack.config.config.matching_scopes(r'^{0}'.format(base_name)))
num_overrides = 0
scope_name = base_name
while scope_name in current_overrides:
scope_name = '{0}{1}'.format(base_name, num_overrides)
num_overrides += 1
with spack.config.override(spack.config.InternalConfigScope(scope_name)):
yield scope_name
@pytest.mark.disable_clean_stage_check @pytest.mark.disable_clean_stage_check
@pytest.mark.regression('8083') @pytest.mark.regression('8083')
def test_regression_8083(tmpdir, capfd, mock_packages, mock_fetch, config): def test_regression_8083(tmpdir, capfd, mock_packages, mock_fetch, config):
@ -45,3 +64,49 @@ def test_mirror_from_env(tmpdir, mock_packages, mock_fetch, config,
mirror_res = os.listdir(os.path.join(mirror_dir, spec.name)) mirror_res = os.listdir(os.path.join(mirror_dir, spec.name))
expected = ['%s.tar.gz' % spec.format('{name}-{version}')] expected = ['%s.tar.gz' % spec.format('{name}-{version}')]
assert mirror_res == expected assert mirror_res == expected
def test_mirror_crud(tmp_scope, capsys):
with capsys.disabled():
mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io')
output = mirror('remove', '--scope', tmp_scope, 'mirror')
assert 'Removed mirror' in output
mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io')
# no-op
output = mirror('set-url', '--scope', tmp_scope,
'mirror', 'http://spack.io')
assert 'Url already set' in output
output = mirror('set-url', '--scope', tmp_scope,
'--push', 'mirror', 's3://spack-public')
assert 'Changed (push) url' in output
# no-op
output = mirror('set-url', '--scope', tmp_scope,
'--push', 'mirror', 's3://spack-public')
assert 'Url already set' in output
output = mirror('remove', '--scope', tmp_scope, 'mirror')
assert 'Removed mirror' in output
output = mirror('list', '--scope', tmp_scope)
assert 'No mirrors configured' in output
def test_mirror_nonexisting(tmp_scope):
with pytest.raises(SpackCommandError):
mirror('remove', '--scope', tmp_scope, 'not-a-mirror')
with pytest.raises(SpackCommandError):
mirror('set-url', '--scope', tmp_scope,
'not-a-mirror', 'http://spack.io')
def test_mirror_name_collision(tmp_scope):
mirror('add', '--scope', tmp_scope, 'first', '1')
with pytest.raises(SpackCommandError):
mirror('add', '--scope', tmp_scope, 'first', '1')

View file

@ -0,0 +1,17 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import pytest
from spack.fetch_strategy import from_url_scheme
def test_fetchstrategy_bad_url_scheme():
"""Ensure that trying to make a fetch strategy from a URL with an
unsupported scheme fails as expected."""
with pytest.raises(ValueError):
fetcher = from_url_scheme( # noqa: F841
'bogus-scheme://example.com/a/b/c')

View file

@ -0,0 +1,29 @@
# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import pytest
import spack.fetch_strategy as spack_fs
import spack.stage as spack_stage
def test_s3fetchstrategy_sans_url():
"""Ensure constructor with no URL fails."""
with pytest.raises(ValueError):
spack_fs.S3FetchStrategy(None)
def test_s3fetchstrategy_bad_url(tmpdir):
"""Ensure fetch with bad URL fails as expected."""
testpath = str(tmpdir)
fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
assert fetcher is not None
with spack_stage.Stage(fetcher, path=testpath) as stage:
assert stage is not None
assert fetcher.archive_file is None
with pytest.raises(spack_fs.FetchError):
fetcher.fetch()

View file

@ -5,9 +5,12 @@
"""Tests for web.py.""" """Tests for web.py."""
import os import os
import pytest
from ordereddict_backport import OrderedDict
import spack.paths import spack.paths
from spack.util.web import spider, find_versions_of_archive import spack.util.web as web_util
from spack.version import ver from spack.version import ver
@ -23,7 +26,7 @@
def test_spider_0(): def test_spider_0():
pages, links = spider(root, depth=0) pages, links = web_util.spider(root, depth=0)
assert root in pages assert root in pages
assert page_1 not in pages assert page_1 not in pages
@ -41,7 +44,7 @@ def test_spider_0():
def test_spider_1(): def test_spider_1():
pages, links = spider(root, depth=1) pages, links = web_util.spider(root, depth=1)
assert root in pages assert root in pages
assert page_1 in pages assert page_1 in pages
@ -60,7 +63,7 @@ def test_spider_1():
def test_spider_2(): def test_spider_2():
pages, links = spider(root, depth=2) pages, links = web_util.spider(root, depth=2)
assert root in pages assert root in pages
assert page_1 in pages assert page_1 in pages
@ -81,7 +84,7 @@ def test_spider_2():
def test_spider_3(): def test_spider_3():
pages, links = spider(root, depth=3) pages, links = web_util.spider(root, depth=3)
assert root in pages assert root in pages
assert page_1 in pages assert page_1 in pages
@ -104,31 +107,36 @@ def test_spider_3():
def test_find_versions_of_archive_0(): def test_find_versions_of_archive_0():
versions = find_versions_of_archive(root_tarball, root, list_depth=0) versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=0)
assert ver('0.0.0') in versions assert ver('0.0.0') in versions
def test_find_versions_of_archive_1(): def test_find_versions_of_archive_1():
versions = find_versions_of_archive(root_tarball, root, list_depth=1) versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=1)
assert ver('0.0.0') in versions assert ver('0.0.0') in versions
assert ver('1.0.0') in versions assert ver('1.0.0') in versions
def test_find_versions_of_archive_2(): def test_find_versions_of_archive_2():
versions = find_versions_of_archive(root_tarball, root, list_depth=2) versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=2)
assert ver('0.0.0') in versions assert ver('0.0.0') in versions
assert ver('1.0.0') in versions assert ver('1.0.0') in versions
assert ver('2.0.0') in versions assert ver('2.0.0') in versions
def test_find_exotic_versions_of_archive_2(): def test_find_exotic_versions_of_archive_2():
versions = find_versions_of_archive(root_tarball, root, list_depth=2) versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=2)
# up for grabs to make this better. # up for grabs to make this better.
assert ver('2.0.0b2') in versions assert ver('2.0.0b2') in versions
def test_find_versions_of_archive_3(): def test_find_versions_of_archive_3():
versions = find_versions_of_archive(root_tarball, root, list_depth=3) versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=3)
assert ver('0.0.0') in versions assert ver('0.0.0') in versions
assert ver('1.0.0') in versions assert ver('1.0.0') in versions
assert ver('2.0.0') in versions assert ver('2.0.0') in versions
@ -137,7 +145,49 @@ def test_find_versions_of_archive_3():
def test_find_exotic_versions_of_archive_3(): def test_find_exotic_versions_of_archive_3():
versions = find_versions_of_archive(root_tarball, root, list_depth=3) versions = web_util.find_versions_of_archive(
root_tarball, root, list_depth=3)
assert ver('2.0.0b2') in versions assert ver('2.0.0b2') in versions
assert ver('3.0a1') in versions assert ver('3.0a1') in versions
assert ver('4.5-rc5') in versions assert ver('4.5-rc5') in versions
def test_get_header():
headers = {
'Content-type': 'text/plain'
}
# looking up headers should just work like a plain dict
# lookup when there is an entry with the right key
assert(web_util.get_header(headers, 'Content-type') == 'text/plain')
# looking up headers should still work if there is a fuzzy match
assert(web_util.get_header(headers, 'contentType') == 'text/plain')
# ...unless there is an exact match for the "fuzzy" spelling.
headers['contentType'] = 'text/html'
assert(web_util.get_header(headers, 'contentType') == 'text/html')
# If lookup has to fallback to fuzzy matching and there are more than one
# fuzzy match, the result depends on the internal ordering of the given
# mapping
headers = OrderedDict()
headers['Content-type'] = 'text/plain'
headers['contentType'] = 'text/html'
assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain')
del headers['Content-type']
assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html')
# Same as above, but different ordering
headers = OrderedDict()
headers['contentType'] = 'text/html'
headers['Content-type'] = 'text/plain'
assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html')
del headers['contentType']
assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain')
# If there isn't even a fuzzy match, raise KeyError
with pytest.raises(KeyError):
web_util.get_header(headers, 'ContentLength')

View file

@ -9,6 +9,7 @@
import itertools import itertools
import os.path import os.path
import re
from six import string_types from six import string_types
import six.moves.urllib.parse as urllib_parse import six.moves.urllib.parse as urllib_parse
@ -69,8 +70,7 @@ def parse(url, scheme='file'):
if scheme == 'file': if scheme == 'file':
path = spack.util.path.canonicalize_path(netloc + path) path = spack.util.path.canonicalize_path(netloc + path)
while path.startswith('//'): path = re.sub(r'^/+', '/', path)
path = path[1:]
netloc = '' netloc = ''
return urllib_parse.ParseResult(scheme=scheme, return urllib_parse.ParseResult(scheme=scheme,

View file

@ -15,9 +15,6 @@
import sys import sys
import traceback import traceback
from itertools import product
import six
from six.moves.urllib.request import urlopen, Request from six.moves.urllib.request import urlopen, Request
from six.moves.urllib.error import URLError from six.moves.urllib.error import URLError
import multiprocessing.pool import multiprocessing.pool
@ -50,30 +47,6 @@ class HTMLParseError(Exception):
# Timeout in seconds for web requests # Timeout in seconds for web requests
_timeout = 10 _timeout = 10
# See docstring for standardize_header_names()
_separators = ('', ' ', '_', '-')
HTTP_HEADER_NAME_ALIASES = {
"Accept-ranges": set(
''.join((A, 'ccept', sep, R, 'anges'))
for A, sep, R in product('Aa', _separators, 'Rr')),
"Content-length": set(
''.join((C, 'ontent', sep, L, 'ength'))
for C, sep, L in product('Cc', _separators, 'Ll')),
"Content-type": set(
''.join((C, 'ontent', sep, T, 'ype'))
for C, sep, T in product('Cc', _separators, 'Tt')),
"Date": set(('Date', 'date')),
"Last-modified": set(
''.join((L, 'ast', sep, M, 'odified'))
for L, sep, M in product('Ll', _separators, 'Mm')),
"Server": set(('Server', 'server'))
}
class LinkParser(HTMLParser): class LinkParser(HTMLParser):
"""This parser just takes an HTML page and strips out the hrefs on the """This parser just takes an HTML page and strips out the hrefs on the
@ -173,7 +146,7 @@ def read_from_url(url, accept_content_type=None):
req.get_method = lambda: "HEAD" req.get_method = lambda: "HEAD"
resp = _urlopen(req, timeout=_timeout, context=context) resp = _urlopen(req, timeout=_timeout, context=context)
content_type = resp.headers.get('Content-type') content_type = get_header(resp.headers, 'Content-type')
# Do the real GET request when we know it's just HTML. # Do the real GET request when we know it's just HTML.
req.get_method = lambda: "GET" req.get_method = lambda: "GET"
@ -185,7 +158,7 @@ def read_from_url(url, accept_content_type=None):
ERROR=str(err))) ERROR=str(err)))
if accept_content_type and not is_web_url: if accept_content_type and not is_web_url:
content_type = response.headers.get('Content-type') content_type = get_header(response.headers, 'Content-type')
reject_content_type = ( reject_content_type = (
accept_content_type and ( accept_content_type and (
@ -208,9 +181,8 @@ def warn_no_ssl_cert_checking():
"your Python to enable certificate verification.") "your Python to enable certificate verification.")
def push_to_url(local_file_path, remote_path, **kwargs): def push_to_url(
keep_original = kwargs.get('keep_original', True) local_file_path, remote_path, keep_original=True, extra_args=None):
remote_url = url_util.parse(remote_path) remote_url = url_util.parse(remote_path)
verify_ssl = spack.config.get('config:verify_ssl') verify_ssl = spack.config.get('config:verify_ssl')
@ -235,7 +207,8 @@ def push_to_url(local_file_path, remote_path, **kwargs):
os.remove(local_file_path) os.remove(local_file_path)
elif remote_url.scheme == 's3': elif remote_url.scheme == 's3':
extra_args = kwargs.get('extra_args', {}) if extra_args is None:
extra_args = {}
remote_path = remote_url.path remote_path = remote_url.path
while remote_path.startswith('/'): while remote_path.startswith('/'):
@ -296,10 +269,25 @@ def remove_url(url):
# Don't even try for other URL schemes. # Don't even try for other URL schemes.
def _list_s3_objects(client, url, num_entries, start_after=None): def _iter_s3_contents(contents, prefix):
for entry in contents:
key = entry['Key']
if not key.startswith('/'):
key = '/' + key
key = os.path.relpath(key, prefix)
if key == '.':
continue
yield key
def _list_s3_objects(client, bucket, prefix, num_entries, start_after=None):
list_args = dict( list_args = dict(
Bucket=url.netloc, Bucket=bucket,
Prefix=url.path, Prefix=prefix[1:],
MaxKeys=num_entries) MaxKeys=num_entries)
if start_after is not None: if start_after is not None:
@ -311,21 +299,19 @@ def _list_s3_objects(client, url, num_entries, start_after=None):
if result['IsTruncated']: if result['IsTruncated']:
last_key = result['Contents'][-1]['Key'] last_key = result['Contents'][-1]['Key']
iter = (key for key in iter = _iter_s3_contents(result['Contents'], prefix)
(
os.path.relpath(entry['Key'], url.path)
for entry in result['Contents']
)
if key != '.')
return iter, last_key return iter, last_key
def _iter_s3_prefix(client, url, num_entries=1024): def _iter_s3_prefix(client, url, num_entries=1024):
key = None key = None
bucket = url.netloc
prefix = re.sub(r'^/*', '/', url.path)
while True: while True:
contents, key = _list_s3_objects( contents, key = _list_s3_objects(
client, url, num_entries, start_after=key) client, bucket, prefix, num_entries, start_after=key)
for x in contents: for x in contents:
yield x yield x
@ -577,106 +563,34 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
return versions return versions
def standardize_header_names(headers): def get_header(headers, header_name):
"""Replace certain header names with standardized spellings. """Looks up a dict of headers for the given header value.
Standardizes the spellings of the following header names: Looks up a dict of headers, [headers], for a header value given by
- Accept-ranges [header_name]. Returns headers[header_name] if header_name is in headers.
- Content-length Otherwise, the first fuzzy match is returned, if any.
- Content-type
- Date
- Last-modified
- Server
Every name considered is translated to one of the above names if the only This fuzzy matching is performed by discarding word separators and
difference between the two is how the first letters of each word are capitalization, so that for example, "Content-length", "content_length",
capitalized; whether words are separated; or, if separated, whether they "conTENtLength", etc., all match. In the case of multiple fuzzy-matches,
are so by a dash (-), underscore (_), or space ( ). Header names that the returned value is the "first" such match given the underlying mapping's
cannot be mapped as described above are returned unaltered. ordering, or unspecified if no such ordering is defined.
For example: The standard spelling of "Content-length" would be substituted If header_name is not in headers, and no such fuzzy match exists, then a
for any of the following names: KeyError is raised.
- Content-length
- content_length
- contentlength
- content_Length
- contentLength
- content Length
... and any other header name, such as "Content-encoding", would not be
altered, regardless of spelling.
If headers is a string, then it (or an appropriate substitute) is returned.
If headers is a non-empty tuple, headers[0] is a string, and there exists a
standardized spelling for header[0] that differs from it, then a new tuple
is returned. This tuple has the same elements as headers, except the first
element is the standardized spelling for headers[0].
If headers is a sequence, then a new list is considered, where each element
is its corresponding element in headers, but mapped as above if a string or
tuple. This new list is returned if at least one of its elements differ
from their corrsponding element in headers.
If headers is a mapping, then a new dict is considered, where the key in
each item is the key of its corresponding item in headers, mapped as above
if a string or tuple. The value is taken from the corresponding item. If
the keys of multiple items in headers map to the same key after being
standardized, then the value for the resulting item is undefined. The new
dict is returned if at least one of its items has a key that differs from
that of their corresponding item in headers, or if the keys of multiple
items in headers map to the same key after being standardized.
In all other cases headers is returned unaltered.
""" """
if isinstance(headers, six.string_types):
for standardized_spelling, other_spellings in (
HTTP_HEADER_NAME_ALIASES.items()):
if headers in other_spellings:
if headers == standardized_spelling:
return headers
return standardized_spelling
return headers
if isinstance(headers, tuple): def unfuzz(header):
if not headers: return re.sub(r'[ _-]', '', header).lower()
return headers
old = headers[0]
if isinstance(old, six.string_types):
new = standardize_header_names(old)
if old is not new:
return (new,) + headers[1:]
return headers
try: try:
changed = False return headers[header_name]
new_dict = {} except KeyError:
for key, value in headers.items(): unfuzzed_header_name = unfuzz(header_name)
if isinstance(key, (tuple, six.string_types)): for header, value in headers.items():
old_key, key = key, standardize_header_names(key) if unfuzz(header) == unfuzzed_header_name:
changed = changed or key is not old_key return value
raise
new_dict[key] = value
return new_dict if changed else headers
except (AttributeError, TypeError, ValueError):
pass
try:
changed = False
new_list = []
for item in headers:
if isinstance(item, (tuple, six.string_types)):
old_item, item = item, standardize_header_names(item)
changed = changed or item is not old_item
new_list.append(item)
return new_list if changed else headers
except TypeError:
pass
return headers
class SpackWebError(spack.error.SpackError): class SpackWebError(spack.error.SpackError):