Follow up/11117 fixes and testing (#13607)

* fix docstring in generate_package_index() refering to "public" keys as "signing" keys * use explicit kwargs in push_to_url() * simplify url_util.parse() per tgamblin's suggestion * replace standardize_header_names() with the much simpler get_header() * add some basic tests * update s3_fetch tests * update S3 list code to strip leading slashes from prefix * correct minor warning regression introduced in #11117 * add more tests * flake8 fixes * add capsys fixture to mirror_crud test * add get_header() tests * use get_header() in more places * incorporate review comments
2019-12-09 17:23:33 -05:00 · 2019-12-09 17:23:33 -05:00 · 0592c58030
commit 0592c58030
parent da9a562182
10 changed files with 270 additions and 156 deletions
--- a/lib/spack/spack/binary_distribution.py
+++ b/lib/spack/spack/binary_distribution.py
@ -272,7 +272,7 @@ def generate_package_index(cache_prefix):

    Creates (or replaces) the "index.html" page at the location given in
    cache_prefix.  This page contains a link for each binary package (*.yaml)
-    and signing key (*.key) under cache_prefix.
+    and public key (*.key) under cache_prefix.
    """
    tmpdir = tempfile.mkdtemp()
    try:
@ -679,7 +679,7 @@ def get_specs(force=False):
        return _cached_specs

    if not spack.mirror.MirrorCollection():
-        tty.warn("No Spack mirrors are currently configured")
+        tty.debug("No Spack mirrors are currently configured")
        return {}

    urls = set()
--- a/lib/spack/spack/fetch_strategy.py
+++ b/lib/spack/spack/fetch_strategy.py
@ -1142,7 +1142,7 @@ def fetch(self):
            with open(basename, 'wb') as f:
                shutil.copyfileobj(stream, f)

-            content_type = headers['Content-type']
+            content_type = web_util.get_header(headers, 'Content-type')

        if content_type == 'text/html':
            warn_content_type_mismatch(self.archive_file or "the archive")
--- a/lib/spack/spack/s3_handler.py
+++ b/lib/spack/spack/s3_handler.py
@ -11,7 +11,6 @@

 import spack.util.s3 as s3_util
 import spack.util.url as url_util
-import spack.util.web as web_util


 # NOTE(opadron): Workaround issue in boto where its StreamingBody
@ -54,8 +53,7 @@ def _s3_open(url):

    # NOTE(opadron): Apply workaround here (see above)
    stream = WrapStream(obj['Body'])
-    headers = web_util.standardize_header_names(
-        obj['ResponseMetadata']['HTTPHeaders'])
+    headers = obj['ResponseMetadata']['HTTPHeaders']

    return url, headers, stream

--- a/lib/spack/spack/test/build_distribution.py
+++ b/lib/spack/spack/test/build_distribution.py
@ -0,0 +1,41 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import pytest
+
+import os
+import os.path
+
+import spack.spec
+import spack.binary_distribution
+
+install = spack.main.SpackCommand('install')
+
+
+def test_build_tarball_overwrite(
+        install_mockery, mock_fetch, monkeypatch, tmpdir):
+
+    with tmpdir.as_cwd():
+        spec = spack.spec.Spec('trivial-install-test-package').concretized()
+        install(str(spec))
+
+        # Runs fine the first time, throws the second time
+        spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
+        with pytest.raises(spack.binary_distribution.NoOverwriteException):
+            spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
+
+        # Should work fine with force=True
+        spack.binary_distribution.build_tarball(
+            spec, '.', force=True, unsigned=True)
+
+        # Remove the tarball and try again.
+        # This must *also* throw, because of the existing .spec.yaml file
+        os.remove(os.path.join(
+            spack.binary_distribution.build_cache_prefix('.'),
+            spack.binary_distribution.tarball_directory_name(spec),
+            spack.binary_distribution.tarball_name(spec, '.spack')))
+
+        with pytest.raises(spack.binary_distribution.NoOverwriteException):
+            spack.binary_distribution.build_tarball(spec, '.', unsigned=True)
--- a/lib/spack/spack/test/cmd/mirror.py
+++ b/lib/spack/spack/test/cmd/mirror.py
@ -6,7 +6,7 @@
 import pytest
 import os

-from spack.main import SpackCommand
+from spack.main import SpackCommand, SpackCommandError
 import spack.environment as ev
 import spack.config

@ -16,6 +16,25 @@
 concretize = SpackCommand('concretize')


+@pytest.fixture
+def tmp_scope():
+    """Creates a temporary configuration scope"""
+
+    base_name = 'internal-testing-scope'
+    current_overrides = set(
+        x.name for x in
+        spack.config.config.matching_scopes(r'^{0}'.format(base_name)))
+
+    num_overrides = 0
+    scope_name = base_name
+    while scope_name in current_overrides:
+        scope_name = '{0}{1}'.format(base_name, num_overrides)
+        num_overrides += 1
+
+    with spack.config.override(spack.config.InternalConfigScope(scope_name)):
+        yield scope_name
+
+
@pytest.mark.disable_clean_stage_check
@pytest.mark.regression('8083')
 def test_regression_8083(tmpdir, capfd, mock_packages, mock_fetch, config):
@ -45,3 +64,49 @@ def test_mirror_from_env(tmpdir, mock_packages, mock_fetch, config,
        mirror_res = os.listdir(os.path.join(mirror_dir, spec.name))
        expected = ['%s.tar.gz' % spec.format('{name}-{version}')]
        assert mirror_res == expected
+
+
+def test_mirror_crud(tmp_scope, capsys):
+    with capsys.disabled():
+        mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io')
+
+        output = mirror('remove', '--scope', tmp_scope, 'mirror')
+        assert 'Removed mirror' in output
+
+        mirror('add', '--scope', tmp_scope, 'mirror', 'http://spack.io')
+
+        # no-op
+        output = mirror('set-url', '--scope', tmp_scope,
+                        'mirror', 'http://spack.io')
+        assert 'Url already set' in output
+
+        output = mirror('set-url', '--scope', tmp_scope,
+                        '--push', 'mirror', 's3://spack-public')
+        assert 'Changed (push) url' in output
+
+        # no-op
+        output = mirror('set-url', '--scope', tmp_scope,
+                        '--push', 'mirror', 's3://spack-public')
+        assert 'Url already set' in output
+
+        output = mirror('remove', '--scope', tmp_scope, 'mirror')
+        assert 'Removed mirror' in output
+
+        output = mirror('list', '--scope', tmp_scope)
+        assert 'No mirrors configured' in output
+
+
+def test_mirror_nonexisting(tmp_scope):
+    with pytest.raises(SpackCommandError):
+        mirror('remove', '--scope', tmp_scope, 'not-a-mirror')
+
+    with pytest.raises(SpackCommandError):
+        mirror('set-url', '--scope', tmp_scope,
+               'not-a-mirror', 'http://spack.io')
+
+
+def test_mirror_name_collision(tmp_scope):
+    mirror('add', '--scope', tmp_scope, 'first', '1')
+
+    with pytest.raises(SpackCommandError):
+        mirror('add', '--scope', tmp_scope, 'first', '1')
--- a/lib/spack/spack/test/fetch_strategy.py
+++ b/lib/spack/spack/test/fetch_strategy.py
@ -0,0 +1,17 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import pytest
+
+from spack.fetch_strategy import from_url_scheme
+
+
+def test_fetchstrategy_bad_url_scheme():
+    """Ensure that trying to make a fetch strategy from a URL with an
+    unsupported scheme fails as expected."""
+
+    with pytest.raises(ValueError):
+        fetcher = from_url_scheme(  # noqa: F841
+            'bogus-scheme://example.com/a/b/c')
--- a/lib/spack/spack/test/s3_fetch.py
+++ b/lib/spack/spack/test/s3_fetch.py
@ -0,0 +1,29 @@
+# Copyright 2013-2019 Lawrence Livermore National Security, LLC and other
+# Spack Project Developers. See the top-level COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+import pytest
+
+import spack.fetch_strategy as spack_fs
+import spack.stage as spack_stage
+
+
+def test_s3fetchstrategy_sans_url():
+    """Ensure constructor with no URL fails."""
+    with pytest.raises(ValueError):
+        spack_fs.S3FetchStrategy(None)
+
+
+def test_s3fetchstrategy_bad_url(tmpdir):
+    """Ensure fetch with bad URL fails as expected."""
+    testpath = str(tmpdir)
+
+    fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
+    assert fetcher is not None
+
+    with spack_stage.Stage(fetcher, path=testpath) as stage:
+        assert stage is not None
+        assert fetcher.archive_file is None
+        with pytest.raises(spack_fs.FetchError):
+            fetcher.fetch()
--- a/lib/spack/spack/test/web.py
+++ b/lib/spack/spack/test/web.py
@ -5,9 +5,12 @@

 """Tests for web.py."""
 import os
+import pytest
+
+from ordereddict_backport import OrderedDict

 import spack.paths
-from spack.util.web import spider, find_versions_of_archive
+import spack.util.web as web_util
 from spack.version import ver


@ -23,7 +26,7 @@


 def test_spider_0():
-    pages, links = spider(root, depth=0)
+    pages, links = web_util.spider(root, depth=0)

    assert root in pages
    assert page_1 not in pages
@ -41,7 +44,7 @@ def test_spider_0():


 def test_spider_1():
-    pages, links = spider(root, depth=1)
+    pages, links = web_util.spider(root, depth=1)

    assert root in pages
    assert page_1 in pages
@ -60,7 +63,7 @@ def test_spider_1():


 def test_spider_2():
-    pages, links = spider(root, depth=2)
+    pages, links = web_util.spider(root, depth=2)

    assert root in pages
    assert page_1 in pages
@ -81,7 +84,7 @@ def test_spider_2():


 def test_spider_3():
-    pages, links = spider(root, depth=3)
+    pages, links = web_util.spider(root, depth=3)

    assert root in pages
    assert page_1 in pages
@ -104,31 +107,36 @@ def test_spider_3():


 def test_find_versions_of_archive_0():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=0)
+    versions = web_util.find_versions_of_archive(
+        root_tarball, root, list_depth=0)
    assert ver('0.0.0') in versions


 def test_find_versions_of_archive_1():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=1)
+    versions = web_util.find_versions_of_archive(
+        root_tarball, root, list_depth=1)
    assert ver('0.0.0') in versions
    assert ver('1.0.0') in versions


 def test_find_versions_of_archive_2():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=2)
+    versions = web_util.find_versions_of_archive(
+        root_tarball, root, list_depth=2)
    assert ver('0.0.0') in versions
    assert ver('1.0.0') in versions
    assert ver('2.0.0') in versions


 def test_find_exotic_versions_of_archive_2():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=2)
+    versions = web_util.find_versions_of_archive(
+        root_tarball, root, list_depth=2)
    # up for grabs to make this better.
    assert ver('2.0.0b2') in versions


 def test_find_versions_of_archive_3():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=3)
+    versions = web_util.find_versions_of_archive(
+        root_tarball, root, list_depth=3)
    assert ver('0.0.0') in versions
    assert ver('1.0.0') in versions
    assert ver('2.0.0') in versions
@ -137,7 +145,49 @@ def test_find_versions_of_archive_3():


 def test_find_exotic_versions_of_archive_3():
-    versions = find_versions_of_archive(root_tarball, root, list_depth=3)
+    versions = web_util.find_versions_of_archive(
+        root_tarball, root, list_depth=3)
    assert ver('2.0.0b2') in versions
    assert ver('3.0a1') in versions
    assert ver('4.5-rc5') in versions
+
+
+def test_get_header():
+    headers = {
+        'Content-type': 'text/plain'
+    }
+
+    # looking up headers should just work like a plain dict
+    # lookup when there is an entry with the right key
+    assert(web_util.get_header(headers, 'Content-type') == 'text/plain')
+
+    # looking up headers should still work if there is a fuzzy match
+    assert(web_util.get_header(headers, 'contentType') == 'text/plain')
+
+    # ...unless there is an exact match for the "fuzzy" spelling.
+    headers['contentType'] = 'text/html'
+    assert(web_util.get_header(headers, 'contentType') == 'text/html')
+
+    # If lookup has to fallback to fuzzy matching and there are more than one
+    # fuzzy match, the result depends on the internal ordering of the given
+    # mapping
+    headers = OrderedDict()
+    headers['Content-type'] = 'text/plain'
+    headers['contentType'] = 'text/html'
+
+    assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain')
+    del headers['Content-type']
+    assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html')
+
+    # Same as above, but different ordering
+    headers = OrderedDict()
+    headers['contentType'] = 'text/html'
+    headers['Content-type'] = 'text/plain'
+
+    assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/html')
+    del headers['contentType']
+    assert(web_util.get_header(headers, 'CONTENT_TYPE') == 'text/plain')
+
+    # If there isn't even a fuzzy match, raise KeyError
+    with pytest.raises(KeyError):
+        web_util.get_header(headers, 'ContentLength')
--- a/lib/spack/spack/util/url.py
+++ b/lib/spack/spack/util/url.py
@ -9,6 +9,7 @@

 import itertools
 import os.path
+import re

 from six import string_types
 import six.moves.urllib.parse as urllib_parse
@ -69,8 +70,7 @@ def parse(url, scheme='file'):

    if scheme == 'file':
        path = spack.util.path.canonicalize_path(netloc + path)
-        while path.startswith('//'):
-            path = path[1:]
+        path = re.sub(r'^/+', '/', path)
        netloc = ''

    return urllib_parse.ParseResult(scheme=scheme,
--- a/lib/spack/spack/util/web.py
+++ b/lib/spack/spack/util/web.py
@ -15,9 +15,6 @@
 import sys
 import traceback

-from itertools import product
-
-import six
 from six.moves.urllib.request import urlopen, Request
 from six.moves.urllib.error import URLError
 import multiprocessing.pool
@ -50,30 +47,6 @@ class HTMLParseError(Exception):
 # Timeout in seconds for web requests
 _timeout = 10

-# See docstring for standardize_header_names()
-_separators = ('', ' ', '_', '-')
-HTTP_HEADER_NAME_ALIASES = {
-    "Accept-ranges": set(
-        ''.join((A, 'ccept', sep, R, 'anges'))
-        for A, sep, R in product('Aa', _separators, 'Rr')),
-
-    "Content-length": set(
-        ''.join((C, 'ontent', sep, L, 'ength'))
-        for C, sep, L in product('Cc', _separators, 'Ll')),
-
-    "Content-type": set(
-        ''.join((C, 'ontent', sep, T, 'ype'))
-        for C, sep, T in product('Cc', _separators, 'Tt')),
-
-    "Date": set(('Date', 'date')),
-
-    "Last-modified": set(
-        ''.join((L, 'ast', sep, M, 'odified'))
-        for L, sep, M in product('Ll', _separators, 'Mm')),
-
-    "Server": set(('Server', 'server'))
-}
-

 class LinkParser(HTMLParser):
    """This parser just takes an HTML page and strips out the hrefs on the
@ -173,7 +146,7 @@ def read_from_url(url, accept_content_type=None):
        req.get_method = lambda: "HEAD"
        resp = _urlopen(req, timeout=_timeout, context=context)

-        content_type = resp.headers.get('Content-type')
+        content_type = get_header(resp.headers, 'Content-type')

    # Do the real GET request when we know it's just HTML.
    req.get_method = lambda: "GET"
@ -185,7 +158,7 @@ def read_from_url(url, accept_content_type=None):
            ERROR=str(err)))

    if accept_content_type and not is_web_url:
-        content_type = response.headers.get('Content-type')
+        content_type = get_header(response.headers, 'Content-type')

    reject_content_type = (
        accept_content_type and (
@ -208,9 +181,8 @@ def warn_no_ssl_cert_checking():
             "your Python to enable certificate verification.")


-def push_to_url(local_file_path, remote_path, **kwargs):
-    keep_original = kwargs.get('keep_original', True)
-
+def push_to_url(
+        local_file_path, remote_path, keep_original=True, extra_args=None):
    remote_url = url_util.parse(remote_path)
    verify_ssl = spack.config.get('config:verify_ssl')

@ -235,7 +207,8 @@ def push_to_url(local_file_path, remote_path, **kwargs):
                    os.remove(local_file_path)

    elif remote_url.scheme == 's3':
-        extra_args = kwargs.get('extra_args', {})
+        if extra_args is None:
+            extra_args = {}

        remote_path = remote_url.path
        while remote_path.startswith('/'):
@ -296,10 +269,25 @@ def remove_url(url):
    # Don't even try for other URL schemes.


-def _list_s3_objects(client, url, num_entries, start_after=None):
+def _iter_s3_contents(contents, prefix):
+    for entry in contents:
+        key = entry['Key']
+
+        if not key.startswith('/'):
+            key = '/' + key
+
+        key = os.path.relpath(key, prefix)
+
+        if key == '.':
+            continue
+
+        yield key
+
+
+def _list_s3_objects(client, bucket, prefix, num_entries, start_after=None):
    list_args = dict(
-        Bucket=url.netloc,
-        Prefix=url.path,
+        Bucket=bucket,
+        Prefix=prefix[1:],
        MaxKeys=num_entries)

    if start_after is not None:
@ -311,21 +299,19 @@ def _list_s3_objects(client, url, num_entries, start_after=None):
    if result['IsTruncated']:
        last_key = result['Contents'][-1]['Key']

-    iter = (key for key in
-            (
-                os.path.relpath(entry['Key'], url.path)
-                for entry in result['Contents']
-            )
-            if key != '.')
+    iter = _iter_s3_contents(result['Contents'], prefix)

    return iter, last_key


 def _iter_s3_prefix(client, url, num_entries=1024):
    key = None
+    bucket = url.netloc
+    prefix = re.sub(r'^/*', '/', url.path)
+
    while True:
        contents, key = _list_s3_objects(
-            client, url, num_entries, start_after=key)
+            client, bucket, prefix, num_entries, start_after=key)

        for x in contents:
            yield x
@ -577,106 +563,34 @@ def find_versions_of_archive(archive_urls, list_url=None, list_depth=0):
    return versions


-def standardize_header_names(headers):
-    """Replace certain header names with standardized spellings.
+def get_header(headers, header_name):
+    """Looks up a dict of headers for the given header value.

-    Standardizes the spellings of the following header names:
-    - Accept-ranges
-    - Content-length
-    - Content-type
-    - Date
-    - Last-modified
-    - Server
+    Looks up a dict of headers, [headers], for a header value given by
+    [header_name].  Returns headers[header_name] if header_name is in headers.
+    Otherwise, the first fuzzy match is returned, if any.

-    Every name considered is translated to one of the above names if the only
-    difference between the two is how the first letters of each word are
-    capitalized; whether words are separated; or, if separated, whether they
-    are so by a dash (-), underscore (_), or space ( ).  Header names that
-    cannot be mapped as described above are returned unaltered.
+    This fuzzy matching is performed by discarding word separators and
+    capitalization, so that for example, "Content-length", "content_length",
+    "conTENtLength", etc., all match.  In the case of multiple fuzzy-matches,
+    the returned value is the "first" such match given the underlying mapping's
+    ordering, or unspecified if no such ordering is defined.

-    For example: The standard spelling of "Content-length" would be substituted
-    for any of the following names:
-    - Content-length
-    - content_length
-    - contentlength
-    - content_Length
-    - contentLength
-    - content Length
-
-    ... and any other header name, such as "Content-encoding", would not be
-    altered, regardless of spelling.
-
-    If headers is a string, then it (or an appropriate substitute) is returned.
-
-    If headers is a non-empty tuple, headers[0] is a string, and there exists a
-    standardized spelling for header[0] that differs from it, then a new tuple
-    is returned.  This tuple has the same elements as headers, except the first
-    element is the standardized spelling for headers[0].
-
-    If headers is a sequence, then a new list is considered, where each element
-    is its corresponding element in headers, but mapped as above if a string or
-    tuple.  This new list is returned if at least one of its elements differ
-    from their corrsponding element in headers.
-
-    If headers is a mapping, then a new dict is considered, where the key in
-    each item is the key of its corresponding item in headers, mapped as above
-    if a string or tuple.  The value is taken from the corresponding item.  If
-    the keys of multiple items in headers map to the same key after being
-    standardized, then the value for the resulting item is undefined.  The new
-    dict is returned if at least one of its items has a key that differs from
-    that of their corresponding item in headers, or if the keys of multiple
-    items in headers map to the same key after being standardized.
-
-    In all other cases headers is returned unaltered.
+    If header_name is not in headers, and no such fuzzy match exists, then a
+    KeyError is raised.
    """
-    if isinstance(headers, six.string_types):
-        for standardized_spelling, other_spellings in (
-                HTTP_HEADER_NAME_ALIASES.items()):
-            if headers in other_spellings:
-                if headers == standardized_spelling:
-                    return headers
-                return standardized_spelling
-        return headers

-    if isinstance(headers, tuple):
-        if not headers:
-            return headers
-        old = headers[0]
-        if isinstance(old, six.string_types):
-            new = standardize_header_names(old)
-            if old is not new:
-                return (new,) + headers[1:]
-        return headers
+    def unfuzz(header):
+        return re.sub(r'[ _-]', '', header).lower()

    try:
-        changed = False
-        new_dict = {}
-        for key, value in headers.items():
-            if isinstance(key, (tuple, six.string_types)):
-                old_key, key = key, standardize_header_names(key)
-                changed = changed or key is not old_key
-
-            new_dict[key] = value
-
-        return new_dict if changed else headers
-    except (AttributeError, TypeError, ValueError):
-        pass
-
-    try:
-        changed = False
-        new_list = []
-        for item in headers:
-            if isinstance(item, (tuple, six.string_types)):
-                old_item, item = item, standardize_header_names(item)
-                changed = changed or item is not old_item
-
-            new_list.append(item)
-
-        return new_list if changed else headers
-    except TypeError:
-        pass
-
-    return headers
+        return headers[header_name]
+    except KeyError:
+        unfuzzed_header_name = unfuzz(header_name)
+        for header, value in headers.items():
+            if unfuzz(header) == unfuzzed_header_name:
+                return value
+        raise


 class SpackWebError(spack.error.SpackError):