From d7596125231e800ca41c60e379be2b8abb47d20d Mon Sep 17 00:00:00 2001 From: Joseph Snyder Date: Fri, 19 Nov 2021 15:28:34 -0500 Subject: [PATCH] Add connection specification to mirror creation (#24244) * Add connection specification to mirror creation This allows each mirror to contain information about the credentials used to access it. Update command and tests based on comments Switch to only "long form" flags for the s3 connection information. Use the "any" function instead of checking for an empty list when looking for s3 connection information. Split test to use the access token separately from the access id and key. Use long flag form in test. Add endpoint_url to available S3 options. Extend the special parameters for an S3 mirror to accept the endpoint_url parameter. Add a test. * Add connection information per URL not per mirror Expand the mirror-based connection information to be per-URL. This will allow a user to specify different S3 connection information for both the fetch and the push URLs. Add a parameter for "profile", another way of storing the id/secret pair. * Switch from "access_profile" to "profile" --- lib/spack/spack/cmd/common/arguments.py | 19 +++++ lib/spack/spack/cmd/mirror.py | 43 ++++++++---- lib/spack/spack/fetch_strategy.py | 8 ++- lib/spack/spack/mirror.py | 93 ++++++++++++++++++++++--- lib/spack/spack/s3_handler.py | 3 +- lib/spack/spack/schema/mirrors.py | 4 +- lib/spack/spack/stage.py | 20 ++++-- lib/spack/spack/test/cmd/mirror.py | 29 +++++++- lib/spack/spack/test/web.py | 4 +- lib/spack/spack/util/s3.py | 26 ++++++- lib/spack/spack/util/web.py | 10 ++- share/spack/spack-completion.bash | 4 +- 12 files changed, 219 insertions(+), 44 deletions(-) diff --git a/lib/spack/spack/cmd/common/arguments.py b/lib/spack/spack/cmd/common/arguments.py index bea8ccea90..0aa30a15ca 100644 --- a/lib/spack/spack/cmd/common/arguments.py +++ b/lib/spack/spack/cmd/common/arguments.py @@ -328,3 +328,22 @@ def reuse(): '--reuse', action='store_true', default=False, help='reuse installed dependencies' ) + + +def add_s3_connection_args(subparser, add_help): + subparser.add_argument( + '--s3-access-key-id', + help="ID string to use to connect to this S3 mirror") + subparser.add_argument( + '--s3-access-key-secret', + help="Secret string to use to connect to this S3 mirror") + subparser.add_argument( + '--s3-access-token', + help="Access Token to use to connect to this S3 mirror") + subparser.add_argument( + '--s3-profile', + help="S3 profile name to use to connect to this S3 mirror", + default=None) + subparser.add_argument( + '--s3-endpoint-url', + help="Access Token to use to connect to this S3 mirror") diff --git a/lib/spack/spack/cmd/mirror.py b/lib/spack/spack/cmd/mirror.py index fa202f09f0..a9e51f019d 100644 --- a/lib/spack/spack/cmd/mirror.py +++ b/lib/spack/spack/cmd/mirror.py @@ -6,7 +6,6 @@ import sys import llnl.util.tty as tty -from llnl.util.tty.colify import colify import spack.cmd import spack.cmd.common.arguments as arguments @@ -93,7 +92,7 @@ def setup_parser(subparser): '--scope', choices=scopes, metavar=scopes_metavar, default=spack.config.default_modify_scope(), help="configuration scope to modify") - + arguments.add_s3_connection_args(add_parser, False) # Remove remove_parser = sp.add_parser('remove', aliases=['rm'], help=mirror_remove.__doc__) @@ -117,6 +116,7 @@ def setup_parser(subparser): '--scope', choices=scopes, metavar=scopes_metavar, default=spack.config.default_modify_scope(), help="configuration scope to modify") + arguments.add_s3_connection_args(set_url_parser, False) # List list_parser = sp.add_parser('list', help=mirror_list.__doc__) @@ -129,7 +129,7 @@ def setup_parser(subparser): def mirror_add(args): """Add a mirror to Spack.""" url = url_util.format(args.url) - spack.mirror.add(args.name, url, args.scope) + spack.mirror.add(args.name, url, args.scope, args) def mirror_remove(args): @@ -140,7 +140,6 @@ def mirror_remove(args): def mirror_set_url(args): """Change the URL of a mirror.""" url = url_util.format(args.url) - mirrors = spack.config.get('mirrors', scope=args.scope) if not mirrors: mirrors = syaml_dict() @@ -149,7 +148,15 @@ def mirror_set_url(args): tty.die("No mirror found with name %s." % args.name) entry = mirrors[args.name] + key_values = ["s3_access_key_id", "s3_access_token", "s3_profile"] + if any(value for value in key_values if value in args): + incoming_data = {"url": url, + "access_pair": (args.s3_access_key_id, + args.s3_access_key_secret), + "access_token": args.s3_access_token, + "profile": args.s3_profile, + "endpoint_url": args.s3_endpoint_url} try: fetch_url = entry['fetch'] push_url = entry['push'] @@ -159,20 +166,28 @@ def mirror_set_url(args): changes_made = False if args.push: - changes_made = changes_made or push_url != url - push_url = url + if isinstance(push_url, dict): + changes_made = changes_made or push_url != incoming_data + push_url = incoming_data + else: + changes_made = changes_made or push_url != url + push_url = url else: - changes_made = ( - changes_made or fetch_url != push_url or push_url != url) - - fetch_url, push_url = url, url + if isinstance(push_url, dict): + changes_made = (changes_made or push_url != incoming_data + or push_url != incoming_data) + fetch_url, push_url = incoming_data, incoming_data + else: + changes_made = changes_made or push_url != url + fetch_url, push_url = url, url items = [ ( (n, u) if n != args.name else ( (n, {"fetch": fetch_url, "push": push_url}) - if fetch_url != push_url else (n, fetch_url) + if fetch_url != push_url else (n, {"fetch": fetch_url, + "push": fetch_url}) ) ) for n, u in mirrors.items() @@ -183,10 +198,10 @@ def mirror_set_url(args): if changes_made: tty.msg( - "Changed%s url for mirror %s." % + "Changed%s url or connection information for mirror %s." % ((" (push)" if args.push else ""), args.name)) else: - tty.msg("Url already set for mirror %s." % args.name) + tty.msg("No changes made to mirror %s." % args.name) def mirror_list(args): @@ -330,7 +345,7 @@ def mirror_create(args): " %-4d failed to fetch." % e) if error: tty.error("Failed downloads:") - colify(s.cformat("{name}{@version}") for s in error) + tty.colify(s.cformat("{name}{@version}") for s in error) sys.exit(1) diff --git a/lib/spack/spack/fetch_strategy.py b/lib/spack/spack/fetch_strategy.py index 888cad7bf3..432010adca 100644 --- a/lib/spack/spack/fetch_strategy.py +++ b/lib/spack/spack/fetch_strategy.py @@ -1437,7 +1437,13 @@ def fetch(self): basename = os.path.basename(parsed_url.path) with working_dir(self.stage.path): - _, headers, stream = web_util.read_from_url(self.url) + import spack.util.s3 as s3_util + s3 = s3_util.create_s3_session(self.url, + connection=s3_util.get_mirror_connection(parsed_url), url_type="fetch") # noqa: E501 + + headers = s3.get_object(Bucket=parsed_url.netloc, + Key=parsed_url.path.lstrip("/")) + stream = headers["Body"] with open(basename, 'wb') as f: shutil.copyfileobj(stream, f) diff --git a/lib/spack/spack/mirror.py b/lib/spack/spack/mirror.py index a4ec57ab5b..2d1a3e819e 100644 --- a/lib/spack/spack/mirror.py +++ b/lib/spack/spack/mirror.py @@ -41,6 +41,10 @@ from spack.version import VersionList +def _is_string(url): + return isinstance(url, six.string_types) + + def _display_mirror_entry(size, name, url, type_=None): if type_: type_ = "".join((" (", type_, ")")) @@ -59,7 +63,8 @@ class Mirror(object): to them. These two URLs are usually the same. """ - def __init__(self, fetch_url, push_url=None, name=None): + def __init__(self, fetch_url, push_url=None, + name=None): self._fetch_url = fetch_url self._push_url = push_url self._name = name @@ -96,7 +101,7 @@ def from_dict(d, name=None): if isinstance(d, six.string_types): return Mirror(d, name=name) else: - return Mirror(d['fetch'], d['push'], name) + return Mirror(d['fetch'], d['push'], name=name) def display(self, max_len=0): if self._push_url is None: @@ -137,24 +142,83 @@ def __repr__(self): def name(self): return self._name or "" + def get_profile(self, url_type): + if isinstance(self._fetch_url, dict): + if url_type == "push": + return self._push_url['profile'] + return self._fetch_url['profile'] + else: + return None + + def set_profile(self, url_type, profile): + if url_type == "push": + self._push_url["profile"] = profile + else: + self._fetch_url["profile"] = profile + + def get_access_pair(self, url_type): + if isinstance(self._fetch_url, dict): + if url_type == "push": + return self._push_url['access_pair'] + return self._fetch_url['access_pair'] + else: + return None + + def set_access_pair(self, url_type, connection_tuple): + if url_type == "push": + self._push_url["access_pair"] = connection_tuple + else: + self._fetch_url["access_pair"] = connection_tuple + + def get_endpoint_url(self, url_type): + if isinstance(self._fetch_url, dict): + if url_type == "push": + return self._push_url['endpoint_url'] + return self._fetch_url['endpoint_url'] + else: + return None + + def set_endpoint_url(self, url_type, url): + if url_type == "push": + self._push_url["endpoint_url"] = url + else: + self._fetch_url["endpoint_url"] = url + + def get_access_token(self, url_type): + if isinstance(self._fetch_url, dict): + if url_type == "push": + return self._push_url['access_token'] + return self._fetch_url['access_token'] + else: + return None + + def set_access_token(self, url_type, connection_token): + if url_type == "push": + self._push_url["access_token"] = connection_token + else: + self._fetch_url["access_token"] = connection_token + @property def fetch_url(self): - return self._fetch_url + return self._fetch_url if _is_string(self._fetch_url) \ + else self._fetch_url["url"] @fetch_url.setter def fetch_url(self, url): - self._fetch_url = url + self._fetch_url["url"] = url self._normalize() @property def push_url(self): if self._push_url is None: - return self._fetch_url - return self._push_url + return self._fetch_url if _is_string(self._fetch_url) \ + else self._fetch_url["url"] + return self._push_url if _is_string(self._push_url) \ + else self._push_url["url"] @push_url.setter def push_url(self, url): - self._push_url = url + self._push_url["url"] = url self._normalize() def _normalize(self): @@ -453,7 +517,7 @@ def create(path, specs, skip_unstable_versions=False): return mirror_stats.stats() -def add(name, url, scope): +def add(name, url, scope, args={}): """Add a named mirror in the given scope""" mirrors = spack.config.get('mirrors', scope=scope) if not mirrors: @@ -463,7 +527,18 @@ def add(name, url, scope): tty.die("Mirror with name %s already exists." % name) items = [(n, u) for n, u in mirrors.items()] - items.insert(0, (name, url)) + mirror_data = url + key_values = ["s3_access_key_id", "s3_access_token", "s3_profile"] + # On creation, assume connection data is set for both + if any(value for value in key_values if value in args): + url_dict = {"url": url, + "access_pair": (args.s3_access_key_id, args.s3_access_key_secret), + "access_token": args.s3_access_token, + "profile": args.s3_profile, + "endpoint_url": args.s3_endpoint_url} + mirror_data = {"fetch": url_dict, "push": url_dict} + + items.insert(0, (name, mirror_data)) mirrors = syaml_dict(items) spack.config.set('mirrors', mirrors, scope=scope) diff --git a/lib/spack/spack/s3_handler.py b/lib/spack/spack/s3_handler.py index 8f9322716a..3841287946 100644 --- a/lib/spack/spack/s3_handler.py +++ b/lib/spack/spack/s3_handler.py @@ -41,7 +41,8 @@ def __getattr__(self, key): def _s3_open(url): parsed = url_util.parse(url) - s3 = s3_util.create_s3_session(parsed) + s3 = s3_util.create_s3_session(parsed, + connection=s3_util.get_mirror_connection(parsed)) # noqa: E501 bucket = parsed.netloc key = parsed.path diff --git a/lib/spack/spack/schema/mirrors.py b/lib/spack/spack/schema/mirrors.py index 6dec5aac97..5e66f9306c 100644 --- a/lib/spack/spack/schema/mirrors.py +++ b/lib/spack/spack/schema/mirrors.py @@ -24,8 +24,8 @@ 'type': 'object', 'required': ['fetch', 'push'], 'properties': { - 'fetch': {'type': 'string'}, - 'push': {'type': 'string'} + 'fetch': {'type': ['string', 'object']}, + 'push': {'type': ['string', 'object']} } } ] diff --git a/lib/spack/spack/stage.py b/lib/spack/spack/stage.py index bffd06ab73..ceff320d6e 100644 --- a/lib/spack/spack/stage.py +++ b/lib/spack/spack/stage.py @@ -437,11 +437,20 @@ def fetch(self, mirror_only=False, err_msg=None): # Join URLs of mirror roots with mirror paths. Because # urljoin() will strip everything past the final '/' in # the root, so we add a '/' if it is not present. - mirror_urls = [] + mirror_urls = {} for mirror in spack.mirror.MirrorCollection().values(): for rel_path in self.mirror_paths: - mirror_urls.append( - url_util.join(mirror.fetch_url, rel_path)) + mirror_url = url_util.join(mirror.fetch_url, rel_path) + mirror_urls[mirror_url] = {} + if mirror.get_access_pair("fetch") or \ + mirror.get_access_token("fetch") or \ + mirror.get_profile("fetch"): + mirror_urls[mirror_url] = { + "access_token": mirror.get_access_token("fetch"), + "access_pair": mirror.get_access_pair("fetch"), + "access_profile": mirror.get_profile("fetch"), + "endpoint_url": mirror.get_endpoint_url("fetch") + } # If this archive is normally fetched from a tarball URL, # then use the same digest. `spack mirror` ensures that @@ -460,10 +469,11 @@ def fetch(self, mirror_only=False, err_msg=None): # Add URL strategies for all the mirrors with the digest # Insert fetchers in the order that the URLs are provided. - for url in reversed(mirror_urls): + for url in reversed(list(mirror_urls.keys())): fetchers.insert( 0, fs.from_url_scheme( - url, digest, expand=expand, extension=extension)) + url, digest, expand=expand, extension=extension, + connection=mirror_urls[url])) if self.default_fetcher.cachable: for rel_path in reversed(list(self.mirror_paths)): diff --git a/lib/spack/spack/test/cmd/mirror.py b/lib/spack/spack/test/cmd/mirror.py index 8716d59e05..4c0238deb2 100644 --- a/lib/spack/spack/test/cmd/mirror.py +++ b/lib/spack/spack/test/cmd/mirror.py @@ -155,7 +155,7 @@ def test_mirror_crud(tmp_scope, capsys): # no-op output = mirror('set-url', '--scope', tmp_scope, 'mirror', 'http://spack.io') - assert 'Url already set' in output + assert 'No changes made' in output output = mirror('set-url', '--scope', tmp_scope, '--push', 'mirror', 's3://spack-public') @@ -164,7 +164,32 @@ def test_mirror_crud(tmp_scope, capsys): # no-op output = mirror('set-url', '--scope', tmp_scope, '--push', 'mirror', 's3://spack-public') - assert 'Url already set' in output + assert 'No changes made' in output + + output = mirror('remove', '--scope', tmp_scope, 'mirror') + assert 'Removed mirror' in output + + # Test S3 connection info token + mirror('add', '--scope', tmp_scope, + '--s3-access-token', 'aaaaaazzzzz', + 'mirror', 's3://spack-public') + + output = mirror('remove', '--scope', tmp_scope, 'mirror') + assert 'Removed mirror' in output + + # Test S3 connection info id/key + mirror('add', '--scope', tmp_scope, + '--s3-access-key-id', 'foo', '--s3-access-key-secret', 'bar', + 'mirror', 's3://spack-public') + + output = mirror('remove', '--scope', tmp_scope, 'mirror') + assert 'Removed mirror' in output + + # Test S3 connection info with endpoint URL + mirror('add', '--scope', tmp_scope, + '--s3-access-token', 'aaaaaazzzzz', + '--s3-endpoint-url', 'http://localhost/', + 'mirror', 's3://spack-public') output = mirror('remove', '--scope', tmp_scope, 'mirror') assert 'Removed mirror' in output diff --git a/lib/spack/spack/test/web.py b/lib/spack/spack/test/web.py index 2ccbf51225..b940fe6055 100644 --- a/lib/spack/spack/test/web.py +++ b/lib/spack/spack/test/web.py @@ -249,7 +249,7 @@ def get_object(self, Bucket=None, Key=None): def test_remove_s3_url(monkeypatch, capfd): fake_s3_url = 's3://my-bucket/subdirectory/mirror' - def mock_create_s3_session(url): + def mock_create_s3_session(url, connection={}): return MockS3Client() monkeypatch.setattr( @@ -269,7 +269,7 @@ def mock_create_s3_session(url): def test_s3_url_exists(monkeypatch, capfd): - def mock_create_s3_session(url): + def mock_create_s3_session(url, connection={}): return MockS3Client() monkeypatch.setattr( spack.util.s3, 'create_s3_session', mock_create_s3_session) diff --git a/lib/spack/spack/util/s3.py b/lib/spack/spack/util/s3.py index b9b56e0498..c120b4d5c5 100644 --- a/lib/spack/spack/util/s3.py +++ b/lib/spack/spack/util/s3.py @@ -11,6 +11,15 @@ import spack.util.url as url_util +def get_mirror_connection(url, url_type="push"): + connection = {} + # Try to find a mirror for potential connection information + for mirror in spack.mirror.MirrorCollection().values(): + if "%s://%s" % (url.scheme, url.netloc) == mirror.push_url: + connection = mirror.to_dict()[url_type] + return connection + + def _parse_s3_endpoint_url(endpoint_url): if not urllib_parse.urlparse(endpoint_url, scheme='').scheme: endpoint_url = '://'.join(('https', endpoint_url)) @@ -18,7 +27,7 @@ def _parse_s3_endpoint_url(endpoint_url): return endpoint_url -def create_s3_session(url): +def create_s3_session(url, connection={}): url = url_util.parse(url) if url.scheme != 's3': raise ValueError( @@ -31,14 +40,25 @@ def create_s3_session(url): from boto3 import Session from botocore.exceptions import ClientError - session = Session() + s3_connection = {} + if connection: + if connection['access_token']: + s3_connection["aws_session_token"] = connection["access_token"] + if connection["access_pair"][0]: + s3_connection["aws_access_key_id"] = connection["access_pair"][0] + s3_connection["aws_secret_access_key"] = connection["access_pair"][1] + if connection["profile"]: + s3_connection["profile_name"] = connection["profile"] + + session = Session(**s3_connection) s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')} endpoint_url = os.environ.get('S3_ENDPOINT_URL') if endpoint_url: s3_client_args['endpoint_url'] = _parse_s3_endpoint_url(endpoint_url) - + elif connection and 'endpoint_url' in connection: + s3_client_args["endpoint_url"] = _parse_s3_endpoint_url(connection["endpoint_url"]) # noqa: E501 # if no access credentials provided above, then access anonymously if not session.get_credentials(): from botocore import UNSIGNED diff --git a/lib/spack/spack/util/web.py b/lib/spack/spack/util/web.py index f1b01ae310..2db91b8080 100644 --- a/lib/spack/spack/util/web.py +++ b/lib/spack/spack/util/web.py @@ -193,7 +193,8 @@ def push_to_url( while remote_path.startswith('/'): remote_path = remote_path[1:] - s3 = s3_util.create_s3_session(remote_url) + s3 = s3_util.create_s3_session(remote_url, + connection=s3_util.get_mirror_connection(remote_url)) # noqa: E501 s3.upload_file(local_file_path, remote_url.netloc, remote_path, ExtraArgs=extra_args) @@ -219,7 +220,9 @@ def url_exists(url): return os.path.exists(local_path) if url.scheme == 's3': - s3 = s3_util.create_s3_session(url) + # Check for URL specific connection information + s3 = s3_util.create_s3_session(url, connection=s3_util.get_mirror_connection(url)) # noqa: E501 + try: s3.get_object(Bucket=url.netloc, Key=url.path.lstrip('/')) return True @@ -263,7 +266,8 @@ def remove_url(url, recursive=False): return if url.scheme == 's3': - s3 = s3_util.create_s3_session(url) + # Try to find a mirror for potential connection information + s3 = s3_util.create_s3_session(url, connection=s3_util.get_mirror_connection(url)) # noqa: E501 bucket = url.netloc if recursive: # Because list_objects_v2 can only return up to 1000 items diff --git a/share/spack/spack-completion.bash b/share/spack/spack-completion.bash index 2d1466ed50..b0d9d70f3f 100755 --- a/share/spack/spack-completion.bash +++ b/share/spack/spack-completion.bash @@ -1277,7 +1277,7 @@ _spack_mirror_destroy() { _spack_mirror_add() { if $list_options then - SPACK_COMPREPLY="-h --help --scope" + SPACK_COMPREPLY="-h --help --scope --s3-access-key-id --s3-access-key-secret --s3-access-token --s3-profile --s3-endpoint-url" else _mirrors fi @@ -1304,7 +1304,7 @@ _spack_mirror_rm() { _spack_mirror_set_url() { if $list_options then - SPACK_COMPREPLY="-h --help --push --scope" + SPACK_COMPREPLY="-h --help --push --scope --s3-access-key-id --s3-access-key-secret --s3-access-token --s3-profile --s3-endpoint-url" else _mirrors fi