Follow up/11117 fixes and testing (#13607)

* fix docstring in generate_package_index() refering to "public" keys as "signing" keys * use explicit kwargs in push_to_url() * simplify url_util.parse() per tgamblin's suggestion * replace standardize_header_names() with the much simpler get_header() * add some basic tests * update s3_fetch tests * update S3 list code to strip leading slashes from prefix * correct minor warning regression introduced in #11117 * add more tests * flake8 fixes * add capsys fixture to mirror_crud test * add get_header() tests * use get_header() in more places * incorporate review comments
2019-12-09 17:23:33 -05:00 · 2019-12-09 17:23:33 -05:00 · 0592c58030
commit 0592c58030
parent da9a562182
10 changed files with 270 additions and 156 deletions
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@ -272,7 +272,7 @@ def generate_package_index(cache_prefix):
    Creates (or replaces) the "index.html" page at the location given in
    cache_prefix.  This page contains a link for each binary package (*.yaml)
-    and signing key (*.key) under cache_prefix.
+    and public key (*.key) under cache_prefix.
    """
    tmpdir = tempfile.mkdtemp()
    try:
@ -679,7 +679,7 @@ def get_specs(force=False):
        return _cached_specs
    if not spack.mirror.MirrorCollection():
-        tty.warn("No Spack mirrors are currently configured")
+        tty.debug("No Spack mirrors are currently configured")
        return {}
    urls = set()
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@ -1142,7 +1142,7 @@ def fetch(self):
            with open(basename, 'wb') as f:
                shutil.copyfileobj(stream, f)
-            content_type = headers['Content-type']
+            content_type = web_util.get_header(headers, 'Content-type')
        if content_type == 'text/html':
            warn_content_type_mismatch(self.archive_file or "the archive")
--- a/lib/spack/spack/s3_handler.py
+++ b/lib/spack/spack/s3_handler.py
@ -11,7 +11,6 @@
 import spack.util.s3 as s3_util
 import spack.util.url as url_util
 import spack.util.web as web_util
 # NOTE(opadron): Workaround issue in boto where its StreamingBody
@ -54,8 +53,7 @@ def _s3_open(url):
    # NOTE(opadron): Apply workaround here (see above)
    stream = WrapStream(obj['Body'])
-    headers = web_util.standardize_header_names(
+    headers = obj['ResponseMetadata']['HTTPHeaders']
        obj['ResponseMetadata']['HTTPHeaders'])
    return url, headers, stream
--- a/lib/spack/spack/test/build_distribution.py
+++ b/lib/spack/spack/test/build_distribution.py
@ -0,0 +1,41 @@
 # Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
 # Spack Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 import pytest
 import os
 import os.path
 import spack.spec
 import spack.binary_distribution
 install = spack.main.SpackCommand('install')
 def test_build_tarball_overwrite(
        install_mockery, mock_fetch, monkeypatch, tmpdir):
    with tmpdir.as_cwd():
        spec = spack.spec.Spec('trivial-install-test-package').concretized()
        install(str(spec))
        # Runs fine the first time, throws the second time
        spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
        with pytest.raises(spack.binary_distribution.NoOverwriteException):
            spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
        # Should work fine with force=True
        spack.binary_distribution.build_tarball(
            spec, '.', force=True, unsigned=True)
        # Remove the tarball and try again.
        # This must *also* throw, because of the existing .spec.yaml file
        os.remove(os.path.join(
            spack.binary_distribution.build_cache_prefix('.'),
            spack.binary_distribution.tarball_directory_name(spec),
            spack.binary_distribution.tarball_name(spec, '.spack')))
        with pytest.raises(spack.binary_distribution.NoOverwriteException):
            spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
--- a/lib/spack/spack/test/cmd/mirror.py
+++ b/lib/spack/spack/test/cmd/mirror.py
@ -6,7 +6,7 @@
 import pytest
 import os
-from spack.main import SpackCommand
+from spack.main import SpackCommand, SpackCommandError
 import spack.environment as ev
 import spack.config
@ -16,6 +16,25 @@
 concretize = SpackCommand('concretize')
@pytest.fixture
 def tmp_scope():
    """Creates a temporary configuration scope"""
    base_name = 'internal-testing-scope'
    current_overrides = set(
        x.name for x in
        spack.config.config.matching_scopes(r'^{0}'.format(base_name)))
    num_overrides = 0
    scope_name = base_name
    while scope_name in current_overrides:
        scope_name = '{0}{1}'.format(base_name, num_overrides)
        num_overrides += 1
    with spack.config.override(spack.config.InternalConfigScope(scope_name)):
        yield scope_name
@pytest.mark.disable_clean_stage_check
@pytest.mark.regression('8083')
 def test_regression_8083(tmpdir, capfd, mock_packages, mock_fetch, config):
@ -45,3 +64,49 @@ def test_mirror_from_env(tmpdir, mock_packages, mock_fetch, config,
        mirror_res = os.listdir(os.path.join(mirror_dir, spec.name))
        expected = ['%s.tar.gz' % spec.format('{name}-{version}')]
        assert mirror_res == expected
 def test_mirror_crud(tmp_scope, capsys):
    with capsys.disabled():
        mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io')
        output = mirror('remove', '--scope', tmp_scope, 'mirror')
        assert 'Removed mirror' in output
        mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io')
        # no-op
        output = mirror('set-url', '--scope', tmp_scope,
                        'mirror', 'http://spack.io')
        assert 'Url already set' in output
        output = mirror('set-url', '--scope', tmp_scope,
                        '--push', 'mirror', 's3://spack-public')
        assert 'Changed (push) url' in output
        # no-op
        output = mirror('set-url', '--scope', tmp_scope,
                        '--push', 'mirror', 's3://spack-public')
        assert 'Url already set' in output
        output = mirror('remove', '--scope', tmp_scope, 'mirror')
        assert 'Removed mirror' in output
        output = mirror('list', '--scope', tmp_scope)
        assert 'No mirrors configured' in output
 def test_mirror_nonexisting(tmp_scope):
    with pytest.raises(SpackCommandError):
        mirror('remove', '--scope', tmp_scope, 'not-a-mirror')
    with pytest.raises(SpackCommandError):
        mirror('set-url', '--scope', tmp_scope,
               'not-a-mirror', 'http://spack.io')
 def test_mirror_name_collision(tmp_scope):
    mirror('add', '--scope', tmp_scope, 'first', '1')
    with pytest.raises(SpackCommandError):
        mirror('add', '--scope', tmp_scope, 'first', '1')
--- a/lib/spack/spack/test/fetch_strategy.py
+++ b/lib/spack/spack/test/fetch_strategy.py
@ -0,0 +1,17 @@
 # Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
 # Spack Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 import pytest
 from spack.fetch_strategy import from_url_scheme
 def test_fetchstrategy_bad_url_scheme():
    """Ensure that trying to make a fetch strategy from a URL with an
    unsupported scheme fails as expected."""
    with pytest.raises(ValueError):
        fetcher = from_url_scheme(  # noqa: F841
            'bogus-scheme://example.com/a/b/c')
--- a/lib/spack/spack/test/s3_fetch.py
+++ b/lib/spack/spack/test/s3_fetch.py
@ -0,0 +1,29 @@
 # Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
 # Spack Project Developers. See the top-level COPYRIGHT file for details.
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 import pytest
 import spack.fetch_strategy as spack_fs
 import spack.stage as spack_stage
 def test_s3fetchstrategy_sans_url():
    """Ensure constructor with no URL fails."""
    with pytest.raises(ValueError):
        spack_fs.S3FetchStrategy(None)
 def test_s3fetchstrategy_bad_url(tmpdir):
    """Ensure fetch with bad URL fails as expected."""
    testpath = str(tmpdir)
    fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
    assert fetcher is not None
    with spack_stage.Stage(fetcher, path=testpath) as stage:
        assert stage is not None
        assert fetcher.archive_file is None
        with pytest.raises(spack_fs.FetchError):
            fetcher.fetch()
--- a/lib/spack/spack/test/web.py
+++ b/lib/spack/spack/test/web.py
@ -5,9 +5,12 @@
 """Tests for web.py."""
 import os
 import pytest
 from ordereddict_backport import OrderedDict
 import spack.paths
-from spack.util.web import spider, find_versions_of_archive
+import spack.util.web as web_util
 from spack.version import ver
@ -23,7 +26,7 @@
 def test_spider_0():
-    pages, links = spider(root, depth=0)
+    pages, links = web_util.spider(root, depth=0)
    assert root in pages
    assert page_1 not in pages
@ -41,7 +44,7 @@ def test_spider_0():
 def test_spider_1():
-    pages, links = spider(root, depth=1)
+    pages, links = web_util.spider(root, depth=1)
    assert root in pages
    assert page_1 in pages
@ -60,7 +63,7 @@ def test_spider_1():
 def test_spider_2():
-    pages, links = spider(root, depth=2)
+    pages, links = web_util.spider(root, depth=2)
    assert root in pages
    assert page_1 in pages
@ -81,7 +84,7 @@ def test_spider_2():
 def test_spider_3():
-    pages, links = spider(root, depth=3)
+    pages, links = web_util.spider(root, depth=3)
    assert root in pages
    assert page_1 in pages
@ -104,31 +107,36 @@ def test_spider_3():
 def test_find_versions_of_archive_0():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=0)
+    versions = web_util.find_versions_of_archive(
        root_tarball, root, list_depth=0)
    assert ver('0.0.0') in versions
 def test_find_versions_of_archive_1():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=1)
+    versions = web_util.find_versions_of_archive(
        root_tarball, root, list_depth=1)
    assert ver('0.0.0') in versions
    assert ver('1.0.0') in versions
 def test_find_versions_of_archive_2():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=2)
+    versions = web_util.find_versions_of_archive(
        root_tarball, root, list_depth=2)
    assert ver('0.0.0') in versions
    assert ver('1.0.0') in versions
    assert ver('2.0.0') in versions
 def test_find_exotic_versions_of_archive_2():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=2)
+    versions = web_util.find_versions_of_archive(
        root_tarball, root, list_depth=2)
    # up for grabs to make this better.
    assert ver('2.0.0b2') in versions
 def test_find_versions_of_archive_3():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=3)
+    versions = web_util.find_versions_of_archive(
        root_tarball, root, list_depth=3)
    assert ver('0.0.0') in versions
    assert ver('1.0.0') in versions
    assert ver('2.0.0') in versions
@ -137,7 +145,49 @@ def test_find_versions_of_archive_3():
 def test_find_exotic_versions_of_archive_3():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=3)
+    versions = web_util.find_versions_of_archive(
        root_tarball, root, list_depth=3)
    assert ver('2.0.0b2') in versions
    assert ver('3.0a1') in versions
    assert ver('4.5-rc5') in versions
 def test_get_header():
    headers = {
        'Content-type': 'text/plain'
    }
    # looking up headers should just work like a plain dict
    # lookup when there is an entry with the right key
    assert(web_util.get_header(headers, 'Content-type') == 'text/plain')
    # looking up headers should still work if there is a fuzzy match
    assert(web_util.get_header(headers, 'contentType') == 'text/plain')
    # ...unless there is an exact match for the "fuzzy" spelling.
    headers['contentType'] = 'text/html'
    assert(web_util.get_header(headers, 'contentType') == 'text/html')
    # If lookup has to fallback to fuzzy matching and there are more than one
    # fuzzy match, the result depends on the internal ordering of the given
    # mapping
    headers = OrderedDict()
    headers['Content-type'] = 'text/plain'
    headers['contentType'] = 'text/html'
    assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain')
    del headers['Content-type']
    assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html')
    # Same as above, but different ordering
    headers = OrderedDict()
    headers['contentType'] = 'text/html'
    headers['Content-type'] = 'text/plain'
    assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html')
    del headers['contentType']
    assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain')
    # If there isn't even a fuzzy match, raise KeyError
    with pytest.raises(KeyError):
        web_util.get_header(headers, 'ContentLength')
--- a/lib/spack/spack/util/url.py
+++ b/lib/spack/spack/util/url.py
@ -9,6 +9,7 @@
 import itertools
 import os.path
 import re
 from six import string_types
 import six.moves.urllib.parse as urllib_parse
@ -69,8 +70,7 @@ def parse(url, scheme='file'):
    if scheme == 'file':
        path = spack.util.path.canonicalize_path(netloc + path)
-        while path.startswith('//'):
+        path = re.sub(r'^/+', '/', path)
            path = path[1:]
        netloc = ''
    return urllib_parse.ParseResult(scheme=scheme,
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@ -15,9 +15,6 @@
 import sys
 import traceback
 from itertools import product
 import six
 from six.moves.urllib.request import urlopen, Request
 from six.moves.urllib.error import URLError
 import multiprocessing.pool
@ -50,30 +47,6 @@ class HTMLParseError(Exception):
 # Timeout in seconds for web requests
 _timeout = 10
 # See docstring for standardize_header_names()
 _separators = ('', ' ', '_', '-')
 HTTP_HEADER_NAME_ALIASES = {
    "Accept-ranges": set(
        ''.join((A, 'ccept', sep, R, 'anges'))
        for A, sep, R in product('Aa', _separators, 'Rr')),
    "Content-length": set(
        ''.join((C, 'ontent', sep, L, 'ength'))
        for C, sep, L in product('Cc', _separators, 'Ll')),
    "Content-type": set(
        ''.join((C, 'ontent', sep, T, 'ype'))
        for C, sep, T in product('Cc', _separators, 'Tt')),
    "Date": set(('Date', 'date')),
    "Last-modified": set(
        ''.join((L, 'ast', sep, M, 'odified'))
        for L, sep, M in product('Ll', _separators, 'Mm')),
    "Server": set(('Server', 'server'))
 }
 class LinkParser(HTMLParser):
    """This parser just takes an HTML page and strips out the hrefs on the
@ -173,7 +146,7 @@ def read_from_url(url, accept_content_type=None):
        req.get_method = lambda: "HEAD"
        resp = _urlopen(req, timeout=_timeout, context=context)
-        content_type = resp.headers.get('Content-type')
+        content_type = get_header(resp.headers, 'Content-type')
    # Do the real GET request when we know it's just HTML.
    req.get_method = lambda: "GET"
@ -185,7 +158,7 @@ def read_from_url(url, accept_content_type=None):
            ERROR=str(err)))
    if accept_content_type and not is_web_url:
-        content_type = response.headers.get('Content-type')
+        content_type = get_header(response.headers, 'Content-type')
    reject_content_type = (
        accept_content_type and (
@ -208,9 +181,8 @@ def warn_no_ssl_cert_checking():
             "your Python to enable certificate verification.")
-def push_to_url(local_file_path, remote_path, **kwargs):
+def push_to_url(
-    keep_original = kwargs.get('keep_original', True)
+        local_file_path, remote_path, keep_original=True, extra_args=None):
    remote_url = url_util.parse(remote_path)
    verify_ssl = spack.config.get('config:verify_ssl')
@ -235,7 +207,8 @@ def push_to_url(local_file_path, remote_path, **kwargs):
                    os.remove(local_file_path)
    elif remote_url.scheme == 's3':
-        extra_args = kwargs.get('extra_args', {})
+        if extra_args is None:
            extra_args = {}
        remote_path = remote_url.path
        while remote_path.startswith('/'):
@ -296,10 +269,25 @@ def remove_url(url):
    # Don't even try for other URL schemes.
-def _list_s3_objects(client, url, num_entries, start_after=None):
+def _iter_s3_contents(contents, prefix):
    for entry in contents:
        key = entry['Key']
        if not key.startswith('/'):
            key = '/' + key
        key = os.path.relpath(key, prefix)
        if key == '.':
            continue
        yield key
 def _list_s3_objects(client, bucket, prefix, num_entries, start_after=None):
    list_args = dict(
-        Bucket=url.netloc,
+        Bucket=bucket,
-        Prefix=url.path,
+        Prefix=prefix[1:],
        MaxKeys=num_entries)
    if start_after is not None:
@ -311,21 +299,19 @@ def _list_s3_objects(client, url, num_entries, start_after=None):
    if result['IsTruncated']:
        last_key = result['Contents'][-1]['Key']
-    iter = (key for key in
+    iter = _iter_s3_contents(result['Contents'], prefix)
            (
                os.path.relpath(entry['Key'], url.path)
                for entry in result['Contents']
            )
            if key != '.')
    return iter, last_key
 def _iter_s3_prefix(client, url, num_entries=1024):
    key = None
    bucket = url.netloc
    prefix = re.sub(r'^/*', '/', url.path)
    while True:
        contents, key = _list_s3_objects(
-            client, url, num_entries, start_after=key)
+            client, bucket, prefix, num_entries, start_after=key)
        for x in contents:
            yield x
@ -577,106 +563,34 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
    return versions
-def standardize_header_names(headers):
+def get_header(headers, header_name):
-    """Replace certain header names with standardized spellings.
+    """Looks up a dict of headers for the given header value.
-    Standardizes the spellings of the following header names:
+    Looks up a dict of headers, [headers], for a header value given by
-    - Accept-ranges
+    [header_name].  Returns headers[header_name] if header_name is in headers.
-    - Content-length
+    Otherwise, the first fuzzy match is returned, if any.
    - Content-type
    - Date
    - Last-modified
    - Server
-    Every name considered is translated to one of the above names if the only
+    This fuzzy matching is performed by discarding word separators and
-    difference between the two is how the first letters of each word are
+    capitalization, so that for example, "Content-length", "content_length",
-    capitalized; whether words are separated; or, if separated, whether they
+    "conTENtLength", etc., all match.  In the case of multiple fuzzy-matches,
-    are so by a dash (-), underscore (_), or space ( ).  Header names that
+    the returned value is the "first" such match given the underlying mapping's
-    cannot be mapped as described above are returned unaltered.
+    ordering, or unspecified if no such ordering is defined.
-    For example: The standard spelling of "Content-length" would be substituted
+    If header_name is not in headers, and no such fuzzy match exists, then a
-    for any of the following names:
+    KeyError is raised.
    - Content-length
    - content_length
    - contentlength
    - content_Length
    - contentLength
    - content Length
    ... and any other header name, such as "Content-encoding", would not be
    altered, regardless of spelling.
    If headers is a string, then it (or an appropriate substitute) is returned.
    If headers is a non-empty tuple, headers[0] is a string, and there exists a
    standardized spelling for header[0] that differs from it, then a new tuple
    is returned.  This tuple has the same elements as headers, except the first
    element is the standardized spelling for headers[0].
    If headers is a sequence, then a new list is considered, where each element
    is its corresponding element in headers, but mapped as above if a string or
    tuple.  This new list is returned if at least one of its elements differ
    from their corrsponding element in headers.
    If headers is a mapping, then a new dict is considered, where the key in
    each item is the key of its corresponding item in headers, mapped as above
    if a string or tuple.  The value is taken from the corresponding item.  If
    the keys of multiple items in headers map to the same key after being
    standardized, then the value for the resulting item is undefined.  The new
    dict is returned if at least one of its items has a key that differs from
    that of their corresponding item in headers, or if the keys of multiple
    items in headers map to the same key after being standardized.
    In all other cases headers is returned unaltered.
    """
    if isinstance(headers, six.string_types):
        for standardized_spelling, other_spellings in (
                HTTP_HEADER_NAME_ALIASES.items()):
            if headers in other_spellings:
                if headers == standardized_spelling:
                    return headers
                return standardized_spelling
        return headers
-    if isinstance(headers, tuple):
+    def unfuzz(header):
-        if not headers:
+        return re.sub(r'[ _-]', '', header).lower()
            return headers
        old = headers[0]
        if isinstance(old, six.string_types):
            new = standardize_header_names(old)
            if old is not new:
                return (new,) + headers[1:]
        return headers
    try:
-        changed = False
+        return headers[header_name]
-        new_dict = {}
+    except KeyError:
-        for key, value in headers.items():
+        unfuzzed_header_name = unfuzz(header_name)
-            if isinstance(key, (tuple, six.string_types)):
+        for header, value in headers.items():
-                old_key, key = key, standardize_header_names(key)
+            if unfuzz(header) == unfuzzed_header_name:
-                changed = changed or key is not old_key
+                return value
-
+        raise
            new_dict[key] = value
        return new_dict if changed else headers
    except (AttributeError, TypeError, ValueError):
        pass
    try:
        changed = False
        new_list = []
        for item in headers:
            if isinstance(item, (tuple, six.string_types)):
                old_item, item = item, standardize_header_names(item)
                changed = changed or item is not old_item
            new_list.append(item)
        return new_list if changed else headers
    except TypeError:
        pass
    return headers
 class SpackWebError(spack.error.SpackError):