Add connection specification to mirror creation (#24244)

* Add connection specification to mirror creation

This allows each mirror to contain information about the credentials
used to access it.

Update command and tests based on comments

Switch to only "long form" flags for the s3 connection information.
Use the "any" function instead of checking for an empty list when looking
for s3 connection information.

Split test to use the access token separately from the access id and key.
Use long flag form in test.

Add endpoint_url to available S3 options.

Extend the special parameters for an S3 mirror to accept the
endpoint_url parameter.

Add a test.

* Add connection information per URL not per mirror

Expand the mirror-based connection information to be per-URL.
This will allow a user to specify different S3 connection information
for both the fetch and the push URLs.

Add a parameter for "profile", another way of storing the id/secret pair.

* Switch from "access_profile" to "profile"
This commit is contained in:
Joseph Snyder 2021-11-19 15:28:34 -05:00 committed by GitHub
parent a5c50220db
commit d759612523
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 219 additions and 44 deletions

View file

@ -328,3 +328,22 @@ def reuse():
'--reuse', action='store_true', default=False,
help='reuse installed dependencies'
)
def add_s3_connection_args(subparser, add_help):
subparser.add_argument(
'--s3-access-key-id',
help="ID string to use to connect to this S3 mirror")
subparser.add_argument(
'--s3-access-key-secret',
help="Secret string to use to connect to this S3 mirror")
subparser.add_argument(
'--s3-access-token',
help="Access Token to use to connect to this S3 mirror")
subparser.add_argument(
'--s3-profile',
help="S3 profile name to use to connect to this S3 mirror",
default=None)
subparser.add_argument(
'--s3-endpoint-url',
help="Access Token to use to connect to this S3 mirror")

View file

@ -6,7 +6,6 @@
import sys
import llnl.util.tty as tty
from llnl.util.tty.colify import colify
import spack.cmd
import spack.cmd.common.arguments as arguments
@ -93,7 +92,7 @@ def setup_parser(subparser):
'--scope', choices=scopes, metavar=scopes_metavar,
default=spack.config.default_modify_scope(),
help="configuration scope to modify")
arguments.add_s3_connection_args(add_parser, False)
# Remove
remove_parser = sp.add_parser('remove', aliases=['rm'],
help=mirror_remove.__doc__)
@ -117,6 +116,7 @@ def setup_parser(subparser):
'--scope', choices=scopes, metavar=scopes_metavar,
default=spack.config.default_modify_scope(),
help="configuration scope to modify")
arguments.add_s3_connection_args(set_url_parser, False)
# List
list_parser = sp.add_parser('list', help=mirror_list.__doc__)
@ -129,7 +129,7 @@ def setup_parser(subparser):
def mirror_add(args):
"""Add a mirror to Spack."""
url = url_util.format(args.url)
spack.mirror.add(args.name, url, args.scope)
spack.mirror.add(args.name, url, args.scope, args)
def mirror_remove(args):
@ -140,7 +140,6 @@ def mirror_remove(args):
def mirror_set_url(args):
"""Change the URL of a mirror."""
url = url_util.format(args.url)
mirrors = spack.config.get('mirrors', scope=args.scope)
if not mirrors:
mirrors = syaml_dict()
@ -149,7 +148,15 @@ def mirror_set_url(args):
tty.die("No mirror found with name %s." % args.name)
entry = mirrors[args.name]
key_values = ["s3_access_key_id", "s3_access_token", "s3_profile"]
if any(value for value in key_values if value in args):
incoming_data = {"url": url,
"access_pair": (args.s3_access_key_id,
args.s3_access_key_secret),
"access_token": args.s3_access_token,
"profile": args.s3_profile,
"endpoint_url": args.s3_endpoint_url}
try:
fetch_url = entry['fetch']
push_url = entry['push']
@ -159,20 +166,28 @@ def mirror_set_url(args):
changes_made = False
if args.push:
changes_made = changes_made or push_url != url
push_url = url
if isinstance(push_url, dict):
changes_made = changes_made or push_url != incoming_data
push_url = incoming_data
else:
changes_made = changes_made or push_url != url
push_url = url
else:
changes_made = (
changes_made or fetch_url != push_url or push_url != url)
fetch_url, push_url = url, url
if isinstance(push_url, dict):
changes_made = (changes_made or push_url != incoming_data
or push_url != incoming_data)
fetch_url, push_url = incoming_data, incoming_data
else:
changes_made = changes_made or push_url != url
fetch_url, push_url = url, url
items = [
(
(n, u)
if n != args.name else (
(n, {"fetch": fetch_url, "push": push_url})
if fetch_url != push_url else (n, fetch_url)
if fetch_url != push_url else (n, {"fetch": fetch_url,
"push": fetch_url})
)
)
for n, u in mirrors.items()
@ -183,10 +198,10 @@ def mirror_set_url(args):
if changes_made:
tty.msg(
"Changed%s url for mirror %s." %
"Changed%s url or connection information for mirror %s." %
((" (push)" if args.push else ""), args.name))
else:
tty.msg("Url already set for mirror %s." % args.name)
tty.msg("No changes made to mirror %s." % args.name)
def mirror_list(args):
@ -330,7 +345,7 @@ def mirror_create(args):
" %-4d failed to fetch." % e)
if error:
tty.error("Failed downloads:")
colify(s.cformat("{name}{@version}") for s in error)
tty.colify(s.cformat("{name}{@version}") for s in error)
sys.exit(1)

View file

@ -1437,7 +1437,13 @@ def fetch(self):
basename = os.path.basename(parsed_url.path)
with working_dir(self.stage.path):
_, headers, stream = web_util.read_from_url(self.url)
import spack.util.s3 as s3_util
s3 = s3_util.create_s3_session(self.url,
connection=s3_util.get_mirror_connection(parsed_url), url_type="fetch") # noqa: E501
headers = s3.get_object(Bucket=parsed_url.netloc,
Key=parsed_url.path.lstrip("/"))
stream = headers["Body"]
with open(basename, 'wb') as f:
shutil.copyfileobj(stream, f)

View file

@ -41,6 +41,10 @@
from spack.version import VersionList
def _is_string(url):
return isinstance(url, six.string_types)
def _display_mirror_entry(size, name, url, type_=None):
if type_:
type_ = "".join((" (", type_, ")"))
@ -59,7 +63,8 @@ class Mirror(object):
to them. These two URLs are usually the same.
"""
def __init__(self, fetch_url, push_url=None, name=None):
def __init__(self, fetch_url, push_url=None,
name=None):
self._fetch_url = fetch_url
self._push_url = push_url
self._name = name
@ -96,7 +101,7 @@ def from_dict(d, name=None):
if isinstance(d, six.string_types):
return Mirror(d, name=name)
else:
return Mirror(d['fetch'], d['push'], name)
return Mirror(d['fetch'], d['push'], name=name)
def display(self, max_len=0):
if self._push_url is None:
@ -137,24 +142,83 @@ def __repr__(self):
def name(self):
return self._name or "<unnamed>"
def get_profile(self, url_type):
if isinstance(self._fetch_url, dict):
if url_type == "push":
return self._push_url['profile']
return self._fetch_url['profile']
else:
return None
def set_profile(self, url_type, profile):
if url_type == "push":
self._push_url["profile"] = profile
else:
self._fetch_url["profile"] = profile
def get_access_pair(self, url_type):
if isinstance(self._fetch_url, dict):
if url_type == "push":
return self._push_url['access_pair']
return self._fetch_url['access_pair']
else:
return None
def set_access_pair(self, url_type, connection_tuple):
if url_type == "push":
self._push_url["access_pair"] = connection_tuple
else:
self._fetch_url["access_pair"] = connection_tuple
def get_endpoint_url(self, url_type):
if isinstance(self._fetch_url, dict):
if url_type == "push":
return self._push_url['endpoint_url']
return self._fetch_url['endpoint_url']
else:
return None
def set_endpoint_url(self, url_type, url):
if url_type == "push":
self._push_url["endpoint_url"] = url
else:
self._fetch_url["endpoint_url"] = url
def get_access_token(self, url_type):
if isinstance(self._fetch_url, dict):
if url_type == "push":
return self._push_url['access_token']
return self._fetch_url['access_token']
else:
return None
def set_access_token(self, url_type, connection_token):
if url_type == "push":
self._push_url["access_token"] = connection_token
else:
self._fetch_url["access_token"] = connection_token
@property
def fetch_url(self):
return self._fetch_url
return self._fetch_url if _is_string(self._fetch_url) \
else self._fetch_url["url"]
@fetch_url.setter
def fetch_url(self, url):
self._fetch_url = url
self._fetch_url["url"] = url
self._normalize()
@property
def push_url(self):
if self._push_url is None:
return self._fetch_url
return self._push_url
return self._fetch_url if _is_string(self._fetch_url) \
else self._fetch_url["url"]
return self._push_url if _is_string(self._push_url) \
else self._push_url["url"]
@push_url.setter
def push_url(self, url):
self._push_url = url
self._push_url["url"] = url
self._normalize()
def _normalize(self):
@ -453,7 +517,7 @@ def create(path, specs, skip_unstable_versions=False):
return mirror_stats.stats()
def add(name, url, scope):
def add(name, url, scope, args={}):
"""Add a named mirror in the given scope"""
mirrors = spack.config.get('mirrors', scope=scope)
if not mirrors:
@ -463,7 +527,18 @@ def add(name, url, scope):
tty.die("Mirror with name %s already exists." % name)
items = [(n, u) for n, u in mirrors.items()]
items.insert(0, (name, url))
mirror_data = url
key_values = ["s3_access_key_id", "s3_access_token", "s3_profile"]
# On creation, assume connection data is set for both
if any(value for value in key_values if value in args):
url_dict = {"url": url,
"access_pair": (args.s3_access_key_id, args.s3_access_key_secret),
"access_token": args.s3_access_token,
"profile": args.s3_profile,
"endpoint_url": args.s3_endpoint_url}
mirror_data = {"fetch": url_dict, "push": url_dict}
items.insert(0, (name, mirror_data))
mirrors = syaml_dict(items)
spack.config.set('mirrors', mirrors, scope=scope)

View file

@ -41,7 +41,8 @@ def __getattr__(self, key):
def _s3_open(url):
parsed = url_util.parse(url)
s3 = s3_util.create_s3_session(parsed)
s3 = s3_util.create_s3_session(parsed,
connection=s3_util.get_mirror_connection(parsed)) # noqa: E501
bucket = parsed.netloc
key = parsed.path

View file

@ -24,8 +24,8 @@
'type': 'object',
'required': ['fetch', 'push'],
'properties': {
'fetch': {'type': 'string'},
'push': {'type': 'string'}
'fetch': {'type': ['string', 'object']},
'push': {'type': ['string', 'object']}
}
}
]

View file

@ -437,11 +437,20 @@ def fetch(self, mirror_only=False, err_msg=None):
# Join URLs of mirror roots with mirror paths. Because
# urljoin() will strip everything past the final '/' in
# the root, so we add a '/' if it is not present.
mirror_urls = []
mirror_urls = {}
for mirror in spack.mirror.MirrorCollection().values():
for rel_path in self.mirror_paths:
mirror_urls.append(
url_util.join(mirror.fetch_url, rel_path))
mirror_url = url_util.join(mirror.fetch_url, rel_path)
mirror_urls[mirror_url] = {}
if mirror.get_access_pair("fetch") or \
mirror.get_access_token("fetch") or \
mirror.get_profile("fetch"):
mirror_urls[mirror_url] = {
"access_token": mirror.get_access_token("fetch"),
"access_pair": mirror.get_access_pair("fetch"),
"access_profile": mirror.get_profile("fetch"),
"endpoint_url": mirror.get_endpoint_url("fetch")
}
# If this archive is normally fetched from a tarball URL,
# then use the same digest. `spack mirror` ensures that
@ -460,10 +469,11 @@ def fetch(self, mirror_only=False, err_msg=None):
# Add URL strategies for all the mirrors with the digest
# Insert fetchers in the order that the URLs are provided.
for url in reversed(mirror_urls):
for url in reversed(list(mirror_urls.keys())):
fetchers.insert(
0, fs.from_url_scheme(
url, digest, expand=expand, extension=extension))
url, digest, expand=expand, extension=extension,
connection=mirror_urls[url]))
if self.default_fetcher.cachable:
for rel_path in reversed(list(self.mirror_paths)):

View file

@ -155,7 +155,7 @@ def test_mirror_crud(tmp_scope, capsys):
# no-op
output = mirror('set-url', '--scope', tmp_scope,
'mirror', 'http://spack.io')
assert 'Url already set' in output
assert 'No changes made' in output
output = mirror('set-url', '--scope', tmp_scope,
'--push', 'mirror', 's3://spack-public')
@ -164,7 +164,32 @@ def test_mirror_crud(tmp_scope, capsys):
# no-op
output = mirror('set-url', '--scope', tmp_scope,
'--push', 'mirror', 's3://spack-public')
assert 'Url already set' in output
assert 'No changes made' in output
output = mirror('remove', '--scope', tmp_scope, 'mirror')
assert 'Removed mirror' in output
# Test S3 connection info token
mirror('add', '--scope', tmp_scope,
'--s3-access-token', 'aaaaaazzzzz',
'mirror', 's3://spack-public')
output = mirror('remove', '--scope', tmp_scope, 'mirror')
assert 'Removed mirror' in output
# Test S3 connection info id/key
mirror('add', '--scope', tmp_scope,
'--s3-access-key-id', 'foo', '--s3-access-key-secret', 'bar',
'mirror', 's3://spack-public')
output = mirror('remove', '--scope', tmp_scope, 'mirror')
assert 'Removed mirror' in output
# Test S3 connection info with endpoint URL
mirror('add', '--scope', tmp_scope,
'--s3-access-token', 'aaaaaazzzzz',
'--s3-endpoint-url', 'http://localhost/',
'mirror', 's3://spack-public')
output = mirror('remove', '--scope', tmp_scope, 'mirror')
assert 'Removed mirror' in output

View file

@ -249,7 +249,7 @@ def get_object(self, Bucket=None, Key=None):
def test_remove_s3_url(monkeypatch, capfd):
fake_s3_url = 's3://my-bucket/subdirectory/mirror'
def mock_create_s3_session(url):
def mock_create_s3_session(url, connection={}):
return MockS3Client()
monkeypatch.setattr(
@ -269,7 +269,7 @@ def mock_create_s3_session(url):
def test_s3_url_exists(monkeypatch, capfd):
def mock_create_s3_session(url):
def mock_create_s3_session(url, connection={}):
return MockS3Client()
monkeypatch.setattr(
spack.util.s3, 'create_s3_session', mock_create_s3_session)

View file

@ -11,6 +11,15 @@
import spack.util.url as url_util
def get_mirror_connection(url, url_type="push"):
connection = {}
# Try to find a mirror for potential connection information
for mirror in spack.mirror.MirrorCollection().values():
if "%s://%s" % (url.scheme, url.netloc) == mirror.push_url:
connection = mirror.to_dict()[url_type]
return connection
def _parse_s3_endpoint_url(endpoint_url):
if not urllib_parse.urlparse(endpoint_url, scheme='').scheme:
endpoint_url = '://'.join(('https', endpoint_url))
@ -18,7 +27,7 @@ def _parse_s3_endpoint_url(endpoint_url):
return endpoint_url
def create_s3_session(url):
def create_s3_session(url, connection={}):
url = url_util.parse(url)
if url.scheme != 's3':
raise ValueError(
@ -31,14 +40,25 @@ def create_s3_session(url):
from boto3 import Session
from botocore.exceptions import ClientError
session = Session()
s3_connection = {}
if connection:
if connection['access_token']:
s3_connection["aws_session_token"] = connection["access_token"]
if connection["access_pair"][0]:
s3_connection["aws_access_key_id"] = connection["access_pair"][0]
s3_connection["aws_secret_access_key"] = connection["access_pair"][1]
if connection["profile"]:
s3_connection["profile_name"] = connection["profile"]
session = Session(**s3_connection)
s3_client_args = {"use_ssl": spack.config.get('config:verify_ssl')}
endpoint_url = os.environ.get('S3_ENDPOINT_URL')
if endpoint_url:
s3_client_args['endpoint_url'] = _parse_s3_endpoint_url(endpoint_url)
elif connection and 'endpoint_url' in connection:
s3_client_args["endpoint_url"] = _parse_s3_endpoint_url(connection["endpoint_url"]) # noqa: E501
# if no access credentials provided above, then access anonymously
if not session.get_credentials():
from botocore import UNSIGNED

View file

@ -193,7 +193,8 @@ def push_to_url(
while remote_path.startswith('/'):
remote_path = remote_path[1:]
s3 = s3_util.create_s3_session(remote_url)
s3 = s3_util.create_s3_session(remote_url,
connection=s3_util.get_mirror_connection(remote_url)) # noqa: E501
s3.upload_file(local_file_path, remote_url.netloc,
remote_path, ExtraArgs=extra_args)
@ -219,7 +220,9 @@ def url_exists(url):
return os.path.exists(local_path)
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
# Check for URL specific connection information
s3 = s3_util.create_s3_session(url, connection=s3_util.get_mirror_connection(url)) # noqa: E501
try:
s3.get_object(Bucket=url.netloc, Key=url.path.lstrip('/'))
return True
@ -263,7 +266,8 @@ def remove_url(url, recursive=False):
return
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
# Try to find a mirror for potential connection information
s3 = s3_util.create_s3_session(url, connection=s3_util.get_mirror_connection(url)) # noqa: E501
bucket = url.netloc
if recursive:
# Because list_objects_v2 can only return up to 1000 items

View file

@ -1277,7 +1277,7 @@ _spack_mirror_destroy() {
_spack_mirror_add() {
if $list_options
then
SPACK_COMPREPLY="-h --help --scope"
SPACK_COMPREPLY="-h --help --scope --s3-access-key-id --s3-access-key-secret --s3-access-token --s3-profile --s3-endpoint-url"
else
_mirrors
fi
@ -1304,7 +1304,7 @@ _spack_mirror_rm() {
_spack_mirror_set_url() {
if $list_options
then
SPACK_COMPREPLY="-h --help --push --scope"
SPACK_COMPREPLY="-h --help --push --scope --s3-access-key-id --s3-access-key-secret --s3-access-token --s3-profile --s3-endpoint-url"
else
_mirrors
fi