Environments: Add support for include URLs (#29026)

* Preliminary support for include URLs in spack.yaml (environment) files

This commit adds support in environments for external configuration files obtained from a URL with a preference for grabbing raw text from GitHub and gitlab for efficient downloads of the relevant files. The URL can also be a link to a directory that contains multiple configuration files.

Remote configuration files are retrieved and cached for the environment. Configuration files with the same name will not be overwritten once cached.
This commit is contained in:
Tamara Dahlgren 2022-08-30 11:13:23 -07:00 committed by GitHub
parent b35a0b1b40
commit 3894ceebc9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 922 additions and 224 deletions

View file

@ -201,6 +201,7 @@ def setup(sphinx):
("py:class", "unittest.case.TestCase"), ("py:class", "unittest.case.TestCase"),
("py:class", "_frozen_importlib_external.SourceFileLoader"), ("py:class", "_frozen_importlib_external.SourceFileLoader"),
("py:class", "clingo.Control"), ("py:class", "clingo.Control"),
("py:class", "six.moves.urllib.parse.ParseResult"),
# Spack classes that are private and we don't want to expose # Spack classes that are private and we don't want to expose
("py:class", "spack.provider_index._IndexBase"), ("py:class", "spack.provider_index._IndexBase"),
("py:class", "spack.repo._PrependFileLoader"), ("py:class", "spack.repo._PrependFileLoader"),

View file

@ -478,14 +478,21 @@ them to the Environment.
spack: spack:
include: include:
- relative/path/to/config.yaml - relative/path/to/config.yaml
- https://github.com/path/to/raw/config/compilers.yaml
- /absolute/path/to/packages.yaml - /absolute/path/to/packages.yaml
Environments can include files with either relative or absolute Environments can include files or URLs. File paths can be relative or
paths. Inline configurations take precedence over included absolute. URLs include the path to the text for individual files or
configurations, so you don't have to change shared configuration files can be the path to a directory containing configuration files.
to make small changes to an individual Environment. Included configs
listed earlier will have higher precedence, as the included configs are ^^^^^^^^^^^^^^^^^^^^^^^^
applied in reverse order. Configuration precedence
^^^^^^^^^^^^^^^^^^^^^^^^
Inline configurations take precedence over included configurations, so
you don't have to change shared configuration files to make small changes
to an individual environment. Included configurations listed earlier will
have higher precedence, as the included configs are applied in reverse order.
------------------------------- -------------------------------
Manually Editing the Specs List Manually Editing the Specs List

View file

@ -26,7 +26,6 @@
import spack.cmd import spack.cmd
import spack.config as config import spack.config as config
import spack.database as spack_db import spack.database as spack_db
import spack.fetch_strategy as fs
import spack.hooks import spack.hooks
import spack.hooks.sbang import spack.hooks.sbang
import spack.mirror import spack.mirror
@ -1231,7 +1230,7 @@ def try_fetch(url_to_fetch):
try: try:
stage.fetch() stage.fetch()
except fs.FetchError: except web_util.FetchError:
stage.destroy() stage.destroy()
return None return None
@ -1954,7 +1953,7 @@ def get_keys(install=False, trust=False, force=False, mirrors=None):
if not os.path.exists(stage.save_filename): if not os.path.exists(stage.save_filename):
try: try:
stage.fetch() stage.fetch()
except fs.FetchError: except web_util.FetchError:
continue continue
tty.debug("Found key {0}".format(fingerprint)) tty.debug("Found key {0}".format(fingerprint))
@ -2106,7 +2105,7 @@ def _download_buildcache_entry(mirror_root, descriptions):
try: try:
stage.fetch() stage.fetch()
break break
except fs.FetchError as e: except web_util.FetchError as e:
tty.debug(e) tty.debug(e)
else: else:
if fail_if_missing: if fail_if_missing:

View file

@ -15,7 +15,6 @@
import spack.cmd.common.arguments as arguments import spack.cmd.common.arguments as arguments
import spack.config import spack.config
import spack.environment as ev import spack.environment as ev
import spack.fetch_strategy as fs
import spack.hash_types as ht import spack.hash_types as ht
import spack.mirror import spack.mirror
import spack.relocate import spack.relocate
@ -711,7 +710,7 @@ def sync_fn(args):
stage.create() stage.create()
stage.fetch() stage.fetch()
web_util.push_to_url(local_path, dest_url, keep_original=True) web_util.push_to_url(local_path, dest_url, keep_original=True)
except fs.FetchError as e: except web_util.FetchError as e:
tty.debug("spack buildcache unable to sync {0}".format(rel_path)) tty.debug("spack buildcache unable to sync {0}".format(rel_path))
tty.debug(e) tty.debug(e)
finally: finally:

View file

@ -64,6 +64,7 @@
# Hacked yaml for configuration files preserves line numbers. # Hacked yaml for configuration files preserves line numbers.
import spack.util.spack_yaml as syaml import spack.util.spack_yaml as syaml
import spack.util.web as web_util
from spack.error import SpackError from spack.error import SpackError
from spack.util.cpus import cpus_available from spack.util.cpus import cpus_available
@ -994,18 +995,19 @@ def read_config_file(filename, schema=None):
# schema when it's not necessary) while allowing us to validate against a # schema when it's not necessary) while allowing us to validate against a
# known schema when the top-level key could be incorrect. # known schema when the top-level key could be incorrect.
# Ignore nonexisting files.
if not os.path.exists(filename): if not os.path.exists(filename):
# Ignore nonexistent files.
tty.debug("Skipping nonexistent config path {0}".format(filename))
return None return None
elif not os.path.isfile(filename): elif not os.path.isfile(filename):
raise ConfigFileError("Invalid configuration. %s exists but is not a file." % filename) raise ConfigFileError("Invalid configuration. %s exists but is not a file." % filename)
elif not os.access(filename, os.R_OK): elif not os.access(filename, os.R_OK):
raise ConfigFileError("Config file is not readable: %s" % filename) raise ConfigFileError("Config file is not readable: {0}".format(filename))
try: try:
tty.debug("Reading config file %s" % filename) tty.debug("Reading config from file {0}".format(filename))
with open(filename) as f: with open(filename) as f:
data = syaml.load_config(f) data = syaml.load_config(f)
@ -1304,6 +1306,95 @@ def _config_from(scopes_or_paths):
return configuration return configuration
def raw_github_gitlab_url(url):
"""Transform a github URL to the raw form to avoid undesirable html.
Args:
url: url to be converted to raw form
Returns: (str) raw github/gitlab url or the original url
"""
# Note we rely on GitHub to redirect the 'raw' URL returned here to the
# actual URL under https://raw.githubusercontent.com/ with '/blob'
# removed and or, '/blame' if needed.
if "github" in url or "gitlab" in url:
return url.replace("/blob/", "/raw/")
return url
def collect_urls(base_url):
"""Return a list of configuration URLs.
Arguments:
base_url (str): URL for a configuration (yaml) file or a directory
containing yaml file(s)
Returns: (list) list of configuration file(s) or empty list if none
"""
if not base_url:
return []
extension = ".yaml"
if base_url.endswith(extension):
return [base_url]
# Collect configuration URLs if the base_url is a "directory".
_, links = web_util.spider(base_url, 0)
return [link for link in links if link.endswith(extension)]
def fetch_remote_configs(url, dest_dir, skip_existing=True):
"""Retrieve configuration file(s) at the specified URL.
Arguments:
url (str): URL for a configuration (yaml) file or a directory containing
yaml file(s)
dest_dir (str): destination directory
skip_existing (bool): Skip files that already exist in dest_dir if
``True``; otherwise, replace those files
Returns: (str) path to the corresponding file if URL is or contains a
single file and it is the only file in the destination directory or
the root (dest_dir) directory if multiple configuration files exist
or are retrieved.
"""
def _fetch_file(url):
raw = raw_github_gitlab_url(url)
tty.debug("Reading config from url {0}".format(raw))
return web_util.fetch_url_text(raw, dest_dir=dest_dir)
if not url:
raise ConfigFileError("Cannot retrieve configuration without a URL")
# Return the local path to the cached configuration file OR to the
# directory containing the cached configuration files.
config_links = collect_urls(url)
existing_files = os.listdir(dest_dir) if os.path.isdir(dest_dir) else []
paths = []
for config_url in config_links:
basename = os.path.basename(config_url)
if skip_existing and basename in existing_files:
tty.warn(
"Will not fetch configuration from {0} since a version already"
"exists in {1}".format(config_url, dest_dir)
)
path = os.path.join(dest_dir, basename)
else:
path = _fetch_file(config_url)
if path:
paths.append(path)
if paths:
return dest_dir if len(paths) > 1 else paths[0]
raise ConfigFileError("Cannot retrieve configuration (yaml) from {0}".format(url))
class ConfigError(SpackError): class ConfigError(SpackError):
"""Superclass for all Spack config related errors.""" """Superclass for all Spack config related errors."""

View file

@ -17,7 +17,6 @@
import llnl.util.filesystem as fs import llnl.util.filesystem as fs
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.filesystem import rename
from llnl.util.lang import dedupe from llnl.util.lang import dedupe
from llnl.util.symlink import symlink from llnl.util.symlink import symlink
@ -593,7 +592,7 @@ def regenerate(self, concretized_root_specs):
symlink(new_root, tmp_symlink_name) symlink(new_root, tmp_symlink_name)
# mv symlink atomically over root symlink to old_root # mv symlink atomically over root symlink to old_root
rename(tmp_symlink_name, self.root) fs.rename(tmp_symlink_name, self.root)
except Exception as e: except Exception as e:
# Clean up new view and temporary symlink on any failure. # Clean up new view and temporary symlink on any failure.
try: try:
@ -897,6 +896,11 @@ def repos_path(self):
def log_path(self): def log_path(self):
return os.path.join(self.path, env_subdir_name, "logs") return os.path.join(self.path, env_subdir_name, "logs")
@property
def config_stage_dir(self):
"""Directory for any staged configuration file(s)."""
return os.path.join(self.env_subdir_path, "config")
@property @property
def view_path_default(self): def view_path_default(self):
# default path for environment views # default path for environment views
@ -928,6 +932,47 @@ def included_config_scopes(self):
# allow paths to contain spack config/environment variables, etc. # allow paths to contain spack config/environment variables, etc.
config_path = substitute_path_variables(config_path) config_path = substitute_path_variables(config_path)
# strip file URL prefix, if needed, to avoid unnecessary remote
# config processing for local files
config_path = config_path.replace("file://", "")
if not os.path.exists(config_path):
# Stage any remote configuration file(s)
if spack.util.url.is_url_format(config_path):
staged_configs = (
os.listdir(self.config_stage_dir)
if os.path.exists(self.config_stage_dir)
else []
)
basename = os.path.basename(config_path)
if basename in staged_configs:
# Do NOT re-stage configuration files over existing
# ones with the same name since there is a risk of
# losing changes (e.g., from 'spack config update').
tty.warn(
"Will not re-stage configuration from {0} to avoid "
"losing changes to the already staged file of the "
"same name.".format(config_path)
)
# Recognize the configuration stage directory
# is flattened to ensure a single copy of each
# configuration file.
config_path = self.config_stage_dir
if basename.endswith(".yaml"):
config_path = os.path.join(config_path, basename)
else:
staged_path = spack.config.fetch_remote_configs(
config_path,
self.config_stage_dir,
skip_existing=True,
)
if not staged_path:
raise SpackEnvironmentError(
"Unable to fetch remote configuration {0}".format(config_path)
)
config_path = staged_path
# treat relative paths as relative to the environment # treat relative paths as relative to the environment
if not os.path.isabs(config_path): if not os.path.isabs(config_path):
config_path = os.path.join(self.path, config_path) config_path = os.path.join(self.path, config_path)
@ -936,10 +981,14 @@ def included_config_scopes(self):
if os.path.isdir(config_path): if os.path.isdir(config_path):
# directories are treated as regular ConfigScopes # directories are treated as regular ConfigScopes
config_name = "env:%s:%s" % (self.name, os.path.basename(config_path)) config_name = "env:%s:%s" % (self.name, os.path.basename(config_path))
tty.debug("Creating ConfigScope {0} for '{1}'".format(config_name, config_path))
scope = spack.config.ConfigScope(config_name, config_path) scope = spack.config.ConfigScope(config_name, config_path)
elif os.path.exists(config_path): elif os.path.exists(config_path):
# files are assumed to be SingleFileScopes # files are assumed to be SingleFileScopes
config_name = "env:%s:%s" % (self.name, config_path) config_name = "env:%s:%s" % (self.name, config_path)
tty.debug(
"Creating SingleFileScope {0} for '{1}'".format(config_name, config_path)
)
scope = spack.config.SingleFileScope( scope = spack.config.SingleFileScope(
config_name, config_path, spack.schema.merged.schema config_name, config_path, spack.schema.merged.schema
) )

View file

@ -52,7 +52,7 @@
import spack.util.crypto as crypto import spack.util.crypto as crypto
import spack.util.pattern as pattern import spack.util.pattern as pattern
import spack.util.url as url_util import spack.util.url as url_util
import spack.util.web import spack.util.web as web_util
import spack.version import spack.version
from spack.util.compression import decompressor_for, extension from spack.util.compression import decompressor_for, extension
from spack.util.executable import CommandNotFoundError, which from spack.util.executable import CommandNotFoundError, which
@ -337,7 +337,7 @@ def fetch(self):
url = None url = None
errors = [] errors = []
for url in self.candidate_urls: for url in self.candidate_urls:
if not self._existing_url(url): if not web_util.url_exists(url, self.curl):
continue continue
try: try:
@ -352,30 +352,6 @@ def fetch(self):
if not self.archive_file: if not self.archive_file:
raise FailedDownloadError(url) raise FailedDownloadError(url)
def _existing_url(self, url):
tty.debug("Checking existence of {0}".format(url))
if spack.config.get("config:url_fetch_method") == "curl":
curl = self.curl
# Telling curl to fetch the first byte (-r 0-0) is supposed to be
# portable.
curl_args = ["--stderr", "-", "-s", "-f", "-r", "0-0", url]
if not spack.config.get("config:verify_ssl"):
curl_args.append("-k")
_ = curl(*curl_args, fail_on_error=False, output=os.devnull)
return curl.returncode == 0
else:
# Telling urllib to check if url is accessible
try:
url, headers, response = spack.util.web.read_from_url(url)
except spack.util.web.SpackWebError as werr:
msg = "Urllib fetch failed to verify url\
{0}\n with error {1}".format(
url, werr
)
raise FailedDownloadError(url, msg)
return response.getcode() is None or response.getcode() == 200
def _fetch_from_url(self, url): def _fetch_from_url(self, url):
if spack.config.get("config:url_fetch_method") == "curl": if spack.config.get("config:url_fetch_method") == "curl":
return self._fetch_curl(url) return self._fetch_curl(url)
@ -397,8 +373,8 @@ def _fetch_urllib(self, url):
# Run urllib but grab the mime type from the http headers # Run urllib but grab the mime type from the http headers
try: try:
url, headers, response = spack.util.web.read_from_url(url) url, headers, response = web_util.read_from_url(url)
except spack.util.web.SpackWebError as e: except web_util.SpackWebError as e:
# clean up archive on failure. # clean up archive on failure.
if self.archive_file: if self.archive_file:
os.remove(self.archive_file) os.remove(self.archive_file)
@ -433,38 +409,19 @@ def _fetch_curl(self, url):
else: else:
save_args = ["-O"] save_args = ["-O"]
curl_args = save_args + [ timeout = 0
"-f", # fail on >400 errors cookie_args = []
"-D",
"-", # print out HTML headers
"-L", # resolve 3xx redirects
url,
]
if not spack.config.get("config:verify_ssl"):
curl_args.append("-k")
if sys.stdout.isatty() and tty.msg_enabled():
curl_args.append("-#") # status bar when using a tty
else:
curl_args.append("-sS") # show errors if fail
connect_timeout = spack.config.get("config:connect_timeout", 10)
if self.extra_options: if self.extra_options:
cookie = self.extra_options.get("cookie") cookie = self.extra_options.get("cookie")
if cookie: if cookie:
curl_args.append("-j") # junk cookies cookie_args.append("-j") # junk cookies
curl_args.append("-b") # specify cookie cookie_args.append("-b") # specify cookie
curl_args.append(cookie) cookie_args.append(cookie)
timeout = self.extra_options.get("timeout") timeout = self.extra_options.get("timeout")
if timeout:
connect_timeout = max(connect_timeout, int(timeout))
if connect_timeout > 0: base_args = web_util.base_curl_fetch_args(url, timeout)
# Timeout if can't establish a connection after n sec. curl_args = save_args + base_args + cookie_args
curl_args.extend(["--connect-timeout", str(connect_timeout)])
# Run curl but grab the mime type from the http headers # Run curl but grab the mime type from the http headers
curl = self.curl curl = self.curl
@ -479,26 +436,10 @@ def _fetch_curl(self, url):
if partial_file and os.path.lexists(partial_file): if partial_file and os.path.lexists(partial_file):
os.remove(partial_file) os.remove(partial_file)
if curl.returncode == 22: try:
# This is a 404. Curl will print the error. web_util.check_curl_code(curl.returncode)
raise FailedDownloadError(url, "URL %s was not found!" % url) except web_util.FetchError as err:
raise spack.fetch_strategy.FailedDownloadError(url, str(err))
elif curl.returncode == 60:
# This is a certificate error. Suggest spack -k
raise FailedDownloadError(
url,
"Curl was unable to fetch due to invalid certificate. "
"This is either an attack, or your cluster's SSL "
"configuration is bad. If you believe your SSL "
"configuration is bad, you can try running spack -k, "
"which will not check SSL certificates."
"Use this at your own risk.",
)
else:
# This is some other curl error. Curl will print the
# error, but print a spack message too
raise FailedDownloadError(url, "Curl failed with error %d" % curl.returncode)
self._check_headers(headers) self._check_headers(headers)
@ -556,7 +497,7 @@ def archive(self, destination):
if not self.archive_file: if not self.archive_file:
raise NoArchiveFileError("Cannot call archive() before fetching.") raise NoArchiveFileError("Cannot call archive() before fetching.")
spack.util.web.push_to_url(self.archive_file, destination, keep_original=True) web_util.push_to_url(self.archive_file, destination, keep_original=True)
@_needs_stage @_needs_stage
def check(self): def check(self):
@ -1385,19 +1326,19 @@ def fetch(self):
parsed_url = url_util.parse(self.url) parsed_url = url_util.parse(self.url)
if parsed_url.scheme != "s3": if parsed_url.scheme != "s3":
raise FetchError("S3FetchStrategy can only fetch from s3:// urls.") raise web_util.FetchError("S3FetchStrategy can only fetch from s3:// urls.")
tty.debug("Fetching {0}".format(self.url)) tty.debug("Fetching {0}".format(self.url))
basename = os.path.basename(parsed_url.path) basename = os.path.basename(parsed_url.path)
with working_dir(self.stage.path): with working_dir(self.stage.path):
_, headers, stream = spack.util.web.read_from_url(self.url) _, headers, stream = web_util.read_from_url(self.url)
with open(basename, "wb") as f: with open(basename, "wb") as f:
shutil.copyfileobj(stream, f) shutil.copyfileobj(stream, f)
content_type = spack.util.web.get_header(headers, "Content-type") content_type = web_util.get_header(headers, "Content-type")
if content_type == "text/html": if content_type == "text/html":
warn_content_type_mismatch(self.archive_file or "the archive") warn_content_type_mismatch(self.archive_file or "the archive")
@ -1426,15 +1367,13 @@ def __init__(self, *args, **kwargs):
@_needs_stage @_needs_stage
def fetch(self): def fetch(self):
import spack.util.web as web_util
if self.archive_file: if self.archive_file:
tty.debug("Already downloaded {0}".format(self.archive_file)) tty.debug("Already downloaded {0}".format(self.archive_file))
return return
parsed_url = url_util.parse(self.url) parsed_url = url_util.parse(self.url)
if parsed_url.scheme != "gs": if parsed_url.scheme != "gs":
raise FetchError("GCSFetchStrategy can only fetch from gs:// urls.") raise web_util.FetchError("GCSFetchStrategy can only fetch from gs:// urls.")
tty.debug("Fetching {0}".format(self.url)) tty.debug("Fetching {0}".format(self.url))
@ -1489,7 +1428,7 @@ def from_kwargs(**kwargs):
on attribute names (e.g., ``git``, ``hg``, etc.) on attribute names (e.g., ``git``, ``hg``, etc.)
Raises: Raises:
FetchError: If no ``fetch_strategy`` matches the args. spack.util.web.FetchError: If no ``fetch_strategy`` matches the args.
""" """
for fetcher in all_strategies: for fetcher in all_strategies:
if fetcher.matches(kwargs): if fetcher.matches(kwargs):
@ -1586,7 +1525,7 @@ def for_package_version(pkg, version):
# if it's a commit, we must use a GitFetchStrategy # if it's a commit, we must use a GitFetchStrategy
if isinstance(version, spack.version.GitVersion): if isinstance(version, spack.version.GitVersion):
if not hasattr(pkg, "git"): if not hasattr(pkg, "git"):
raise FetchError( raise web_util.FetchError(
"Cannot fetch git version for %s. Package has no 'git' attribute" % pkg.name "Cannot fetch git version for %s. Package has no 'git' attribute" % pkg.name
) )
# Populate the version with comparisons to other commits # Populate the version with comparisons to other commits
@ -1731,15 +1670,11 @@ def destroy(self):
shutil.rmtree(self.root, ignore_errors=True) shutil.rmtree(self.root, ignore_errors=True)
class FetchError(spack.error.SpackError): class NoCacheError(web_util.FetchError):
"""Superclass for fetcher errors."""
class NoCacheError(FetchError):
"""Raised when there is no cached archive for a package.""" """Raised when there is no cached archive for a package."""
class FailedDownloadError(FetchError): class FailedDownloadError(web_util.FetchError):
"""Raised when a download fails.""" """Raised when a download fails."""
def __init__(self, url, msg=""): def __init__(self, url, msg=""):
@ -1747,23 +1682,23 @@ def __init__(self, url, msg=""):
self.url = url self.url = url
class NoArchiveFileError(FetchError): class NoArchiveFileError(web_util.FetchError):
""" "Raised when an archive file is expected but none exists.""" """Raised when an archive file is expected but none exists."""
class NoDigestError(FetchError): class NoDigestError(web_util.FetchError):
"""Raised after attempt to checksum when URL has no digest.""" """Raised after attempt to checksum when URL has no digest."""
class ExtrapolationError(FetchError): class ExtrapolationError(web_util.FetchError):
"""Raised when we can't extrapolate a version for a package.""" """Raised when we can't extrapolate a version for a package."""
class FetcherConflict(FetchError): class FetcherConflict(web_util.FetchError):
"""Raised for packages with invalid fetch attributes.""" """Raised for packages with invalid fetch attributes."""
class InvalidArgsError(FetchError): class InvalidArgsError(web_util.FetchError):
"""Raised when a version can't be deduced from a set of arguments.""" """Raised when a version can't be deduced from a set of arguments."""
def __init__(self, pkg=None, version=None, **args): def __init__(self, pkg=None, version=None, **args):
@ -1776,11 +1711,11 @@ def __init__(self, pkg=None, version=None, **args):
super(InvalidArgsError, self).__init__(msg, long_msg) super(InvalidArgsError, self).__init__(msg, long_msg)
class ChecksumError(FetchError): class ChecksumError(web_util.FetchError):
"""Raised when archive fails to checksum.""" """Raised when archive fails to checksum."""
class NoStageError(FetchError): class NoStageError(web_util.FetchError):
"""Raised when fetch operations are called before set_stage().""" """Raised when fetch operations are called before set_stage()."""
def __init__(self, method): def __init__(self, method):

View file

@ -63,6 +63,7 @@
from spack.util.executable import ProcessError, which from spack.util.executable import ProcessError, which
from spack.util.package_hash import package_hash from spack.util.package_hash import package_hash
from spack.util.prefix import Prefix from spack.util.prefix import Prefix
from spack.util.web import FetchError
from spack.version import GitVersion, Version, VersionBase from spack.version import GitVersion, Version, VersionBase
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
@ -3022,13 +3023,6 @@ def possible_dependencies(*pkg_or_spec, **kwargs):
return visited return visited
class FetchError(spack.error.SpackError):
"""Raised when something goes wrong during fetch."""
def __init__(self, message, long_msg=None):
super(FetchError, self).__init__(message, long_msg)
class PackageStillNeededError(InstallError): class PackageStillNeededError(InstallError):
"""Raised when package is still needed by another on uninstall.""" """Raised when package is still needed by another on uninstall."""

View file

@ -42,6 +42,7 @@
import spack.util.pattern as pattern import spack.util.pattern as pattern
import spack.util.url as url_util import spack.util.url as url_util
from spack.util.crypto import bit_length, prefix_bits from spack.util.crypto import bit_length, prefix_bits
from spack.util.web import FetchError
# The well-known stage source subdirectory name. # The well-known stage source subdirectory name.
_source_path_subdir = "spack-src" _source_path_subdir = "spack-src"
@ -529,7 +530,7 @@ def print_errors(errors):
self.fetcher = self.default_fetcher self.fetcher = self.default_fetcher
default_msg = "All fetchers failed for {0}".format(self.name) default_msg = "All fetchers failed for {0}".format(self.name)
raise fs.FetchError(err_msg or default_msg, None) raise FetchError(err_msg or default_msg, None)
print_errors(errors) print_errors(errors)

View file

@ -134,7 +134,7 @@ def test_config_yaml_is_preserved_during_bootstrap(mutable_config):
@pytest.mark.regression("26548") @pytest.mark.regression("26548")
def test_custom_store_in_environment(mutable_config, tmpdir): def test_bootstrap_custom_store_in_environment(mutable_config, tmpdir):
# Test that the custom store in an environment is taken into account # Test that the custom store in an environment is taken into account
# during bootstrapping # during bootstrapping
spack_yaml = tmpdir.join("spack.yaml") spack_yaml = tmpdir.join("spack.yaml")

View file

@ -29,6 +29,7 @@
from spack.util.executable import Executable from spack.util.executable import Executable
from spack.util.mock_package import MockPackageMultiRepo from spack.util.mock_package import MockPackageMultiRepo
from spack.util.path import substitute_path_variables from spack.util.path import substitute_path_variables
from spack.util.web import FetchError
from spack.version import Version from spack.version import Version
# TODO-27021 # TODO-27021
@ -656,7 +657,7 @@ def test_env_with_config():
assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs())
def test_with_config_bad_include(capfd): def test_with_config_bad_include():
env_name = "test_bad_include" env_name = "test_bad_include"
test_config = """\ test_config = """\
spack: spack:
@ -667,15 +668,14 @@ def test_with_config_bad_include(capfd):
_env_create(env_name, StringIO(test_config)) _env_create(env_name, StringIO(test_config))
e = ev.read(env_name) e = ev.read(env_name)
with pytest.raises(SystemExit): with pytest.raises(spack.config.ConfigFileError) as exc:
with e: with e:
e.concretize() e.concretize()
out, err = capfd.readouterr() err = str(exc)
assert "not retrieve configuration" in err
assert os.path.join("no", "such", "directory") in err
assert "missing include" in err
assert "/no/such/directory" in err
assert os.path.join("no", "such", "file.yaml") in err
assert ev.active_environment() is None assert ev.active_environment() is None
@ -721,25 +721,44 @@ def test_env_with_include_config_files_same_basename():
assert environment_specs[1].satisfies("mpileaks@2.2") assert environment_specs[1].satisfies("mpileaks@2.2")
def test_env_with_included_config_file(): @pytest.fixture(scope="function")
test_config = """\ def packages_file(tmpdir):
env: """Return the path to the packages configuration file."""
include: raw_yaml = """
- ./included-config.yaml
specs:
- mpileaks
"""
_env_create("test", StringIO(test_config))
e = ev.read("test")
with open(os.path.join(e.path, "included-config.yaml"), "w") as f:
f.write(
"""\
packages: packages:
mpileaks: mpileaks:
version: [2.2] version: [2.2]
""" """
) filename = tmpdir.ensure("testconfig", "packages.yaml")
filename.write(raw_yaml)
yield filename
def mpileaks_env_config(include_path):
"""Return the contents of an environment that includes the provided
path and lists mpileaks as the sole spec."""
return """\
env:
include:
- {0}
specs:
- mpileaks
""".format(
include_path
)
def test_env_with_included_config_file(packages_file):
"""Test inclusion of a relative packages configuration file added to an
existing environment."""
include_filename = "included-config.yaml"
test_config = mpileaks_env_config(os.path.join(".", include_filename))
_env_create("test", StringIO(test_config))
e = ev.read("test")
included_path = os.path.join(e.path, include_filename)
fs.rename(packages_file.strpath, included_path)
with e: with e:
e.concretize() e.concretize()
@ -747,65 +766,81 @@ def test_env_with_included_config_file():
assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs())
def test_env_with_included_config_scope(): def test_env_with_included_config_file_url(tmpdir, mutable_empty_config, packages_file):
"""Test configuration inclusion of a file whose path is a URL before
the environment is concretized."""
spack_yaml = tmpdir.join("spack.yaml")
with spack_yaml.open("w") as f:
f.write("spack:\n include:\n - file://{0}\n".format(packages_file))
env = ev.Environment(tmpdir.strpath)
ev.activate(env)
scopes = env.included_config_scopes()
assert len(scopes) == 1
cfg = spack.config.get("packages")
assert cfg["mpileaks"]["version"] == [2.2]
def test_env_with_included_config_missing_file(tmpdir, mutable_empty_config):
"""Test inclusion of a missing configuration file raises FetchError
noting missing file."""
spack_yaml = tmpdir.join("spack.yaml")
missing_file = tmpdir.join("packages.yaml")
with spack_yaml.open("w") as f:
f.write("spack:\n include:\n - {0}\n".format(missing_file.strpath))
env = ev.Environment(tmpdir.strpath)
with pytest.raises(FetchError, match="No such file or directory"):
ev.activate(env)
def test_env_with_included_config_scope(tmpdir, packages_file):
"""Test inclusion of a package file from the environment's configuration
stage directory. This test is intended to represent a case where a remote
file has already been staged."""
config_scope_path = os.path.join(ev.root("test"), "config") config_scope_path = os.path.join(ev.root("test"), "config")
test_config = (
"""\
env:
include:
- %s
specs:
- mpileaks
"""
% config_scope_path
)
# Configure the environment to include file(s) from the environment's
# remote configuration stage directory.
test_config = mpileaks_env_config(config_scope_path)
# Create the environment
_env_create("test", StringIO(test_config)) _env_create("test", StringIO(test_config))
e = ev.read("test") e = ev.read("test")
# Copy the packages.yaml file to the environment configuration
# directory so it is picked up during concretization. (Using
# copy instead of rename in case the fixture scope changes.)
fs.mkdirp(config_scope_path) fs.mkdirp(config_scope_path)
with open(os.path.join(config_scope_path, "packages.yaml"), "w") as f: include_filename = os.path.basename(packages_file.strpath)
f.write( included_path = os.path.join(config_scope_path, include_filename)
"""\ fs.copy(packages_file.strpath, included_path)
packages:
mpileaks:
version: [2.2]
"""
)
# Ensure the concretized environment reflects contents of the
# packages.yaml file.
with e: with e:
e.concretize() e.concretize()
assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs()) assert any(x.satisfies("mpileaks@2.2") for x in e._get_environment_specs())
def test_env_with_included_config_var_path(): def test_env_with_included_config_var_path(packages_file):
"""Test inclusion of a package configuration file with path variables
"staged" in the environment's configuration stage directory."""
config_var_path = os.path.join("$tempdir", "included-config.yaml") config_var_path = os.path.join("$tempdir", "included-config.yaml")
test_config = ( test_config = mpileaks_env_config(config_var_path)
"""\
env:
include:
- %s
specs:
- mpileaks
"""
% config_var_path
)
_env_create("test", StringIO(test_config)) _env_create("test", StringIO(test_config))
e = ev.read("test") e = ev.read("test")
config_real_path = substitute_path_variables(config_var_path) config_real_path = substitute_path_variables(config_var_path)
fs.mkdirp(os.path.dirname(config_real_path)) fs.mkdirp(os.path.dirname(config_real_path))
with open(config_real_path, "w") as f: fs.rename(packages_file.strpath, config_real_path)
f.write( assert os.path.exists(config_real_path)
"""\
packages:
mpileaks:
version: [2.2]
"""
)
with e: with e:
e.concretize() e.concretize()
@ -3030,3 +3065,24 @@ def test_unify_when_possible_works_around_conflicts():
assert len([x for x in e.all_specs() if x.satisfies("mpileaks+opt")]) == 1 assert len([x for x in e.all_specs() if x.satisfies("mpileaks+opt")]) == 1
assert len([x for x in e.all_specs() if x.satisfies("mpileaks~opt")]) == 1 assert len([x for x in e.all_specs() if x.satisfies("mpileaks~opt")]) == 1
assert len([x for x in e.all_specs() if x.satisfies("mpich")]) == 1 assert len([x for x in e.all_specs() if x.satisfies("mpich")]) == 1
def test_env_include_packages_url(
tmpdir, mutable_empty_config, mock_spider_configs, mock_curl_configs
):
"""Test inclusion of a (GitHub) URL."""
develop_url = "https://github.com/fake/fake/blob/develop/"
default_packages = develop_url + "etc/fake/defaults/packages.yaml"
spack_yaml = tmpdir.join("spack.yaml")
with spack_yaml.open("w") as f:
f.write("spack:\n include:\n - {0}\n".format(default_packages))
assert os.path.isfile(spack_yaml.strpath)
with spack.config.override("config:url_fetch_method", "curl"):
env = ev.Environment(tmpdir.strpath)
ev.activate(env)
scopes = env.included_config_scopes()
assert len(scopes) == 1
cfg = spack.config.get("packages")
assert "openmpi" in cfg["all"]["providers"]["mpi"]

View file

@ -12,8 +12,8 @@
import spack.error import spack.error
import spack.spec import spack.spec
import spack.store import spack.store
from spack.fetch_strategy import FetchError
from spack.main import SpackCommand, SpackCommandError from spack.main import SpackCommand, SpackCommandError
from spack.util.web import FetchError
pytestmark = pytest.mark.usefixtures("config", "mutable_mock_repo") pytestmark = pytest.mark.usefixtures("config", "mutable_mock_repo")

View file

@ -12,7 +12,8 @@
import pytest import pytest
from six import StringIO from six import StringIO
from llnl.util.filesystem import getuid, mkdirp, touch import llnl.util.tty as tty
from llnl.util.filesystem import getuid, join_path, mkdirp, touch, touchp
import spack.config import spack.config
import spack.environment as ev import spack.environment as ev
@ -828,9 +829,9 @@ def test_bad_config_section(mock_low_high_config):
spack.config.get("foobar") spack.config.get("foobar")
@pytest.mark.skipif(sys.platform == "win32", reason="Not supported on Windows (yet)") @pytest.mark.skipif(sys.platform == "win32", reason="chmod not supported on Windows")
@pytest.mark.skipif(getuid() == 0, reason="user is root") @pytest.mark.skipif(getuid() == 0, reason="user is root")
def test_bad_command_line_scopes(tmpdir, mock_low_high_config): def test_bad_command_line_scopes(tmpdir, config):
cfg = spack.config.Configuration() cfg = spack.config.Configuration()
with tmpdir.as_cwd(): with tmpdir.as_cwd():
@ -1265,3 +1266,168 @@ def test_user_cache_path_is_overridable(working_env):
def test_user_cache_path_is_default_when_env_var_is_empty(working_env): def test_user_cache_path_is_default_when_env_var_is_empty(working_env):
os.environ["SPACK_USER_CACHE_PATH"] = "" os.environ["SPACK_USER_CACHE_PATH"] = ""
assert os.path.expanduser("~%s.spack" % os.sep) == spack.paths._get_user_cache_path() assert os.path.expanduser("~%s.spack" % os.sep) == spack.paths._get_user_cache_path()
github_url = "https://github.com/fake/fake/{0}/develop"
gitlab_url = "https://gitlab.fake.io/user/repo/-/blob/config/defaults"
@pytest.mark.parametrize(
"url,isfile",
[
(github_url.format("tree"), False),
("{0}/README.md".format(github_url.format("blob")), True),
("{0}/etc/fake/defaults/packages.yaml".format(github_url.format("blob")), True),
(gitlab_url, False),
(None, False),
],
)
def test_config_collect_urls(mutable_empty_config, mock_spider_configs, url, isfile):
with spack.config.override("config:url_fetch_method", "curl"):
urls = spack.config.collect_urls(url)
if url:
if isfile:
expected = 1 if url.endswith(".yaml") else 0
assert len(urls) == expected
else:
# Expect multiple configuration files for a "directory"
assert len(urls) > 1
else:
assert not urls
@pytest.mark.parametrize(
"url,isfile,fail",
[
(github_url.format("tree"), False, False),
(gitlab_url, False, False),
("{0}/README.md".format(github_url.format("blob")), True, True),
("{0}/compilers.yaml".format(gitlab_url), True, False),
(None, False, True),
],
)
def test_config_fetch_remote_configs(
tmpdir,
mutable_empty_config,
mock_collect_urls,
mock_curl_configs,
url,
isfile,
fail,
):
def _has_content(filename):
# The first element of all configuration files for this test happen to
# be the basename of the file so this check leverages that feature. If
# that changes, then this check will need to change accordingly.
element = "{0}:".format(os.path.splitext(os.path.basename(filename))[0])
with open(filename, "r") as fd:
for line in fd:
if element in line:
return True
tty.debug("Expected {0} in '{1}'".format(element, filename))
return False
dest_dir = join_path(tmpdir.strpath, "defaults")
if fail:
msg = "Cannot retrieve configuration"
with spack.config.override("config:url_fetch_method", "curl"):
with pytest.raises(spack.config.ConfigFileError, match=msg):
spack.config.fetch_remote_configs(url, dest_dir)
else:
with spack.config.override("config:url_fetch_method", "curl"):
path = spack.config.fetch_remote_configs(url, dest_dir)
assert os.path.exists(path)
if isfile:
# Ensure correct file is "fetched"
assert os.path.basename(path) == os.path.basename(url)
# Ensure contents of the file has expected config element
assert _has_content(path)
else:
for filename in os.listdir(path):
assert _has_content(join_path(path, filename))
@pytest.fixture(scope="function")
def mock_collect_urls(mock_config_data, monkeypatch):
"""Mock the collection of URLs to avoid mocking spider."""
_, config_files = mock_config_data
def _collect(base_url):
if not base_url:
return []
ext = os.path.splitext(base_url)[1]
if ext:
return [base_url] if ext == ".yaml" else []
return [join_path(base_url, f) for f in config_files]
monkeypatch.setattr(spack.config, "collect_urls", _collect)
yield
@pytest.mark.parametrize(
"url,skip",
[
(github_url.format("tree"), True),
("{0}/compilers.yaml".format(gitlab_url), True),
],
)
def test_config_fetch_remote_configs_skip(
tmpdir, mutable_empty_config, mock_collect_urls, mock_curl_configs, url, skip
):
"""Ensure skip fetching remote config file if it already exists when
required and not skipping if replacing it."""
def check_contents(filename, expected):
with open(filename, "r") as fd:
lines = fd.readlines()
if expected:
assert lines[0] == "compilers:"
else:
assert not lines
dest_dir = join_path(tmpdir.strpath, "defaults")
filename = "compilers.yaml"
# Create a stage directory with an empty configuration file
path = join_path(dest_dir, filename)
touchp(path)
# Do NOT replace the existing cached configuration file if skipping
expected = None if skip else "compilers:"
with spack.config.override("config:url_fetch_method", "curl"):
path = spack.config.fetch_remote_configs(url, dest_dir, skip)
result_filename = path if path.endswith(".yaml") else join_path(path, filename)
check_contents(result_filename, expected)
def test_config_file_dir_failure(tmpdir, mutable_empty_config):
with pytest.raises(spack.config.ConfigFileError, match="not a file"):
spack.config.read_config_file(tmpdir.strpath)
@pytest.mark.skipif(sys.platform == "win32", reason="chmod not supported on Windows")
def test_config_file_read_perms_failure(tmpdir, mutable_empty_config):
"""Test reading a configuration file without permissions to ensure
ConfigFileError is raised."""
filename = join_path(tmpdir.strpath, "test.yaml")
touch(filename)
os.chmod(filename, 0o200)
with pytest.raises(spack.config.ConfigFileError, match="not readable"):
spack.config.read_config_file(filename)
def test_config_file_read_invalid_yaml(tmpdir, mutable_empty_config):
"""Test reading a configuration file with invalid (unparseable) YAML
raises a ConfigFileError."""
filename = join_path(tmpdir.strpath, "test.yaml")
with open(filename, "w") as f:
f.write("spack:\nview")
with pytest.raises(spack.config.ConfigFileError, match="parsing yaml"):
spack.config.read_config_file(filename)

View file

@ -26,6 +26,7 @@
import archspec.cpu.schema import archspec.cpu.schema
import llnl.util.lang import llnl.util.lang
import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp, remove_linked_tree, working_dir from llnl.util.filesystem import mkdirp, remove_linked_tree, working_dir
import spack.binary_distribution import spack.binary_distribution
@ -47,8 +48,9 @@
import spack.util.executable import spack.util.executable
import spack.util.gpg import spack.util.gpg
import spack.util.spack_yaml as syaml import spack.util.spack_yaml as syaml
from spack.fetch_strategy import FetchError, FetchStrategyComposite, URLFetchStrategy from spack.fetch_strategy import FetchStrategyComposite, URLFetchStrategy
from spack.util.pattern import Bunch from spack.util.pattern import Bunch
from spack.util.web import FetchError
is_windows = sys.platform == "win32" is_windows = sys.platform == "win32"
@ -1684,3 +1686,82 @@ def noncyclical_dir_structure(tmpdir):
with open(j("file_3"), "wb"): with open(j("file_3"), "wb"):
pass pass
yield d yield d
@pytest.fixture(scope="function")
def mock_config_data():
config_data_dir = os.path.join(spack.paths.test_path, "data", "config")
return config_data_dir, os.listdir(config_data_dir)
@pytest.fixture(scope="function")
def mock_curl_configs(mock_config_data, monkeypatch):
"""
Mock curl-based retrieval of configuration files from the web by grabbing
them from the test data configuration directory.
Fetches a single (configuration) file if the name matches one in the test
data directory.
"""
config_data_dir, config_files = mock_config_data
class MockCurl(object):
def __init__(self):
self.returncode = None
def __call__(self, *args, **kwargs):
url = [a for a in args if a.startswith("http")][0]
basename = os.path.basename(url)
if os.path.splitext(url)[1]:
if basename in config_files:
filename = os.path.join(config_data_dir, basename)
with open(filename, "r") as f:
lines = f.readlines()
write_file(os.path.basename(filename), "".join(lines))
self.returncode = 0
else:
# This is a "404" and is technically only returned if -f
# flag is provided to curl.
tty.msg("curl: (22) The requested URL returned error: 404")
self.returncode = 22
def mock_curl(*args):
return MockCurl()
monkeypatch.setattr(spack.util.web, "_curl", mock_curl)
yield
@pytest.fixture(scope="function")
def mock_spider_configs(mock_config_data, monkeypatch):
"""
Mock retrieval of configuration file URLs from the web by grabbing
them from the test data configuration directory.
"""
config_data_dir, config_files = mock_config_data
def _spider(*args, **kwargs):
root_urls = args[0]
if not root_urls:
return [], set()
root_urls = [root_urls] if isinstance(root_urls, str) else root_urls
# Any URL with an extension will be treated like a file; otherwise,
# it is considered a directory/folder and we'll grab all available
# files.
urls = []
for url in root_urls:
if os.path.splitext(url)[1]:
urls.append(url)
else:
urls.extend([os.path.join(url, f) for f in config_files])
return [], set(urls)
monkeypatch.setattr(spack.util.web, "spider", _spider)
yield

View file

@ -10,6 +10,7 @@
import spack.config import spack.config
import spack.fetch_strategy import spack.fetch_strategy
import spack.stage import spack.stage
from spack.util.web import FetchError
@pytest.mark.parametrize("_fetch_method", ["curl", "urllib"]) @pytest.mark.parametrize("_fetch_method", ["curl", "urllib"])
@ -32,7 +33,7 @@ def test_gcsfetchstrategy_bad_url(tmpdir, _fetch_method):
with spack.stage.Stage(fetcher, path=testpath) as stage: with spack.stage.Stage(fetcher, path=testpath) as stage:
assert stage is not None assert stage is not None
assert fetcher.archive_file is None assert fetcher.archive_file is None
with pytest.raises(spack.fetch_strategy.FetchError): with pytest.raises(FetchError):
fetcher.fetch() fetcher.fetch()

View file

@ -573,7 +573,7 @@ def _instr(pkg):
monkeypatch.setattr(spack.package_base.PackageBase, "download_instr", _instr) monkeypatch.setattr(spack.package_base.PackageBase, "download_instr", _instr)
expected = pkg.download_instr if manual else "All fetchers failed" expected = pkg.download_instr if manual else "All fetchers failed"
with pytest.raises(spack.fetch_strategy.FetchError, match=expected): with pytest.raises(spack.util.web.FetchError, match=expected):
pkg.do_fetch() pkg.do_fetch()

View file

@ -10,6 +10,7 @@
import spack.config as spack_config import spack.config as spack_config
import spack.fetch_strategy as spack_fs import spack.fetch_strategy as spack_fs
import spack.stage as spack_stage import spack.stage as spack_stage
from spack.util.web import FetchError
@pytest.mark.parametrize("_fetch_method", ["curl", "urllib"]) @pytest.mark.parametrize("_fetch_method", ["curl", "urllib"])
@ -32,7 +33,7 @@ def test_s3fetchstrategy_bad_url(tmpdir, _fetch_method):
with spack_stage.Stage(fetcher, path=testpath) as stage: with spack_stage.Stage(fetcher, path=testpath) as stage:
assert stage is not None assert stage is not None
assert fetcher.archive_file is None assert fetcher.archive_file is None
with pytest.raises(spack_fs.FetchError): with pytest.raises(FetchError):
fetcher.fetch() fetcher.fetch()

View file

@ -22,6 +22,7 @@
from spack.resource import Resource from spack.resource import Resource
from spack.stage import DIYStage, ResourceStage, Stage, StageComposite from spack.stage import DIYStage, ResourceStage, Stage, StageComposite
from spack.util.path import canonicalize_path from spack.util.path import canonicalize_path
from spack.util.web import FetchError
# The following values are used for common fetch and stage mocking fixtures: # The following values are used for common fetch and stage mocking fixtures:
_archive_base = "test-files" _archive_base = "test-files"
@ -525,7 +526,7 @@ def test_no_search_mirror_only(self, failing_fetch_strategy, failing_search_fn):
with stage: with stage:
try: try:
stage.fetch(mirror_only=True) stage.fetch(mirror_only=True)
except spack.fetch_strategy.FetchError: except FetchError:
pass pass
check_destroy(stage, self.stage_name) check_destroy(stage, self.stage_name)
@ -540,7 +541,7 @@ def test_search_if_default_fails(self, failing_fetch_strategy, search_fn, err_ms
stage = Stage(failing_fetch_strategy, name=self.stage_name, search_fn=search_fn) stage = Stage(failing_fetch_strategy, name=self.stage_name, search_fn=search_fn)
with stage: with stage:
with pytest.raises(spack.fetch_strategy.FetchError, match=expected): with pytest.raises(FetchError, match=expected):
stage.fetch(mirror_only=False, err_msg=err_msg) stage.fetch(mirror_only=False, err_msg=err_msg)
check_destroy(stage, self.stage_name) check_destroy(stage, self.stage_name)

View file

@ -17,6 +17,7 @@
import spack.repo import spack.repo
import spack.util.crypto as crypto import spack.util.crypto as crypto
import spack.util.executable import spack.util.executable
import spack.util.web as web_util
from spack.spec import Spec from spack.spec import Spec
from spack.stage import Stage from spack.stage import Stage
from spack.util.executable import which from spack.util.executable import which
@ -328,9 +329,8 @@ def _which(*args, **kwargs):
err_msg = err_fmt.format(args[0]) err_msg = err_fmt.format(args[0])
raise spack.util.executable.CommandNotFoundError(err_msg) raise spack.util.executable.CommandNotFoundError(err_msg)
# Patching the 'which' symbol imported by fetch_strategy works # Patching the 'which' symbol imported by fetch_strategy needed due
# since it is too late in import processing to patch the defining # to 'from spack.util.executable import which' in this module.
# (spack.util.executable) module's symbol.
monkeypatch.setattr(fs, "which", _which) monkeypatch.setattr(fs, "which", _which)
testpath = str(tmpdir) testpath = str(tmpdir)
@ -342,3 +342,79 @@ def _which(*args, **kwargs):
with Stage(fetcher, path=testpath) as stage: with Stage(fetcher, path=testpath) as stage:
out = stage.fetch() out = stage.fetch()
assert err_fmt.format("curl") in out assert err_fmt.format("curl") in out
def test_url_fetch_text_without_url(tmpdir):
with pytest.raises(web_util.FetchError, match="URL is required"):
web_util.fetch_url_text(None)
def test_url_fetch_text_curl_failures(tmpdir, monkeypatch):
"""Check fetch_url_text if URL's curl is missing."""
err_fmt = "No such command {0}"
def _which(*args, **kwargs):
err_msg = err_fmt.format(args[0])
raise spack.util.executable.CommandNotFoundError(err_msg)
# Patching the 'which' symbol imported by spack.util.web needed due
# to 'from spack.util.executable import which' in this module.
monkeypatch.setattr(spack.util.web, "which", _which)
with spack.config.override("config:url_fetch_method", "curl"):
with pytest.raises(web_util.FetchError, match="Missing required curl"):
web_util.fetch_url_text("https://github.com/")
def test_url_check_curl_errors():
"""Check that standard curl error returncodes raise expected errors."""
# Check returncode 22 (i.e., 404)
with pytest.raises(web_util.FetchError, match="not found"):
web_util.check_curl_code(22)
# Check returncode 60 (certificate error)
with pytest.raises(web_util.FetchError, match="invalid certificate"):
web_util.check_curl_code(60)
def test_url_missing_curl(tmpdir, monkeypatch):
"""Check url_exists failures if URL's curl is missing."""
err_fmt = "No such command {0}"
def _which(*args, **kwargs):
err_msg = err_fmt.format(args[0])
raise spack.util.executable.CommandNotFoundError(err_msg)
# Patching the 'which' symbol imported by spack.util.web needed due
# to 'from spack.util.executable import which' in this module.
monkeypatch.setattr(spack.util.web, "which", _which)
with spack.config.override("config:url_fetch_method", "curl"):
with pytest.raises(web_util.FetchError, match="Missing required curl"):
web_util.url_exists("https://github.com/")
def test_url_fetch_text_urllib_bad_returncode(tmpdir, monkeypatch):
class response(object):
def getcode(self):
return 404
def _read_from_url(*args, **kwargs):
return None, None, response()
monkeypatch.setattr(spack.util.web, "read_from_url", _read_from_url)
with spack.config.override("config:url_fetch_method", "urllib"):
with pytest.raises(web_util.FetchError, match="failed with error code"):
web_util.fetch_url_text("https://github.com/")
def test_url_fetch_text_urllib_web_error(tmpdir, monkeypatch):
def _raise_web_error(*args, **kwargs):
raise web_util.SpackWebError("bad url")
monkeypatch.setattr(spack.util.web, "read_from_url", _raise_web_error)
with spack.config.override("config:url_fetch_method", "urllib"):
with pytest.raises(web_util.FetchError, match="fetch failed to verify"):
web_util.fetch_url_text("https://github.com/")

View file

@ -306,7 +306,14 @@ def add_padding(path, length):
def canonicalize_path(path): def canonicalize_path(path):
"""Same as substitute_path_variables, but also take absolute path.""" """Same as substitute_path_variables, but also take absolute path.
Arguments:
path (str): path being converted as needed
Returns:
(str): An absolute path with path variable substitution
"""
# Get file in which path was written in case we need to make it absolute # Get file in which path was written in case we need to make it absolute
# relative to that path. # relative to that path.
filename = None filename = None

View file

@ -12,7 +12,7 @@
import re import re
import sys import sys
import six.moves.urllib.parse as urllib_parse import six.moves.urllib.parse
from six import string_types from six import string_types
from spack.util.path import ( from spack.util.path import (
@ -67,11 +67,18 @@ def local_file_path(url):
def parse(url, scheme="file"): def parse(url, scheme="file"):
"""Parse a url. """Parse a url.
For file:// URLs, the netloc and path components are concatenated and Path variable substitution is performed on file URLs as needed. The
passed through spack.util.path.canoncalize_path(). variables are documented at
https://spack.readthedocs.io/en/latest/configuration.html#spack-specific-variables.
Otherwise, the returned value is the same as urllib's urlparse() with Arguments:
allow_fragments=False. url (str): URL to be parsed
scheme (str): associated URL scheme
Returns:
(six.moves.urllib.parse.ParseResult): For file scheme URLs, the
netloc and path components are concatenated and passed through
spack.util.path.canoncalize_path(). Otherwise, the returned value
is the same as urllib's urlparse() with allow_fragments=False.
""" """
# guarantee a value passed in is of proper url format. Guarantee # guarantee a value passed in is of proper url format. Guarantee
# allows for easier string manipulation accross platforms # allows for easier string manipulation accross platforms
@ -79,7 +86,11 @@ def parse(url, scheme="file"):
require_url_format(url) require_url_format(url)
url = escape_file_url(url) url = escape_file_url(url)
url_obj = ( url_obj = (
urllib_parse.urlparse(url, scheme=scheme, allow_fragments=False) six.moves.urllib.parse.urlparse(
url,
scheme=scheme,
allow_fragments=False,
)
if isinstance(url, string_types) if isinstance(url, string_types)
else url else url
) )
@ -108,8 +119,13 @@ def parse(url, scheme="file"):
if sys.platform == "win32": if sys.platform == "win32":
path = convert_to_posix_path(path) path = convert_to_posix_path(path)
return urllib_parse.ParseResult( return six.moves.urllib.parse.ParseResult(
scheme=scheme, netloc=netloc, path=path, params=params, query=query, fragment=None scheme=scheme,
netloc=netloc,
path=path,
params=params,
query=query,
fragment=None,
) )
@ -188,7 +204,11 @@ def join(base_url, path, *extra, **kwargs):
last_abs_component = None last_abs_component = None
scheme = "" scheme = ""
for i in range(n - 1, -1, -1): for i in range(n - 1, -1, -1):
obj = urllib_parse.urlparse(paths[i], scheme="", allow_fragments=False) obj = six.moves.urllib.parse.urlparse(
paths[i],
scheme="",
allow_fragments=False,
)
scheme = obj.scheme scheme = obj.scheme
@ -198,7 +218,11 @@ def join(base_url, path, *extra, **kwargs):
# Without a scheme, we have to go back looking for the # Without a scheme, we have to go back looking for the
# next-last component that specifies a scheme. # next-last component that specifies a scheme.
for j in range(i - 1, -1, -1): for j in range(i - 1, -1, -1):
obj = urllib_parse.urlparse(paths[j], scheme="", allow_fragments=False) obj = six.moves.urllib.parse.urlparse(
paths[j],
scheme="",
allow_fragments=False,
)
if obj.scheme: if obj.scheme:
paths[i] = "{SM}://{NL}{PATH}".format( paths[i] = "{SM}://{NL}{PATH}".format(
@ -214,13 +238,17 @@ def join(base_url, path, *extra, **kwargs):
if last_abs_component is not None: if last_abs_component is not None:
paths = paths[last_abs_component:] paths = paths[last_abs_component:]
if len(paths) == 1: if len(paths) == 1:
result = urllib_parse.urlparse(paths[0], scheme="file", allow_fragments=False) result = six.moves.urllib.parse.urlparse(
paths[0],
scheme="file",
allow_fragments=False,
)
# another subtlety: If the last argument to join() is an absolute # another subtlety: If the last argument to join() is an absolute
# file:// URL component with a relative path, the relative path # file:// URL component with a relative path, the relative path
# needs to be resolved. # needs to be resolved.
if result.scheme == "file" and result.netloc: if result.scheme == "file" and result.netloc:
result = urllib_parse.ParseResult( result = six.moves.urllib.parse.ParseResult(
scheme=result.scheme, scheme=result.scheme,
netloc="", netloc="",
path=posixpath.abspath(result.netloc + result.path), path=posixpath.abspath(result.netloc + result.path),
@ -278,8 +306,13 @@ def _join(base_url, path, *extra, **kwargs):
base_path = convert_to_posix_path(base_path) base_path = convert_to_posix_path(base_path)
return format( return format(
urllib_parse.ParseResult( six.moves.urllib.parse.ParseResult(
scheme=scheme, netloc=netloc, path=base_path, params=params, query=query, fragment=None scheme=scheme,
netloc=netloc,
path=base_path,
params=params,
query=query,
fragment=None,
) )
) )
@ -337,9 +370,12 @@ def parse_git_url(url):
return (scheme, user, hostname, port, path) return (scheme, user, hostname, port, path)
def is_url_format(url):
return re.search(r"^(file://|http://|https://|ftp://|s3://|gs://|ssh://|git://|/)", url)
def require_url_format(url): def require_url_format(url):
ut = re.search(r"^(file://|http://|https://|ftp://|s3://|gs://|ssh://|git://|/)", url) if not is_url_format(url):
if not ut:
raise ValueError("Invalid url format from url: %s" % url) raise ValueError("Invalid url format from url: %s" % url)

View file

@ -22,7 +22,7 @@
import llnl.util.lang import llnl.util.lang
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.filesystem import mkdirp, rename from llnl.util.filesystem import mkdirp, rename, working_dir
import spack import spack
import spack.config import spack.config
@ -33,6 +33,7 @@
import spack.util.s3 as s3_util import spack.util.s3 as s3_util
import spack.util.url as url_util import spack.util.url as url_util
from spack.util.compression import ALLOWED_ARCHIVE_TYPES from spack.util.compression import ALLOWED_ARCHIVE_TYPES
from spack.util.executable import CommandNotFoundError, which
from spack.util.path import convert_to_posix_path from spack.util.path import convert_to_posix_path
#: User-Agent used in Request objects #: User-Agent used in Request objects
@ -224,30 +225,222 @@ def push_to_url(local_file_path, remote_path, keep_original=True, extra_args=Non
) )
def url_exists(url): def base_curl_fetch_args(url, timeout=0):
url = url_util.parse(url) """Return the basic fetch arguments typically used in calls to curl.
local_path = url_util.local_file_path(url)
The arguments include those for ensuring behaviors such as failing on
errors for codes over 400, printing HTML headers, resolving 3xx redirects,
status or failure handling, and connection timeouts.
It also uses the following configuration option to set an additional
argument as needed:
* config:connect_timeout (int): connection timeout
* config:verify_ssl (str): Perform SSL verification
Arguments:
url (str): URL whose contents will be fetched
timeout (int): Connection timeout, which is only used if higher than
config:connect_timeout
Returns (list): list of argument strings
"""
curl_args = [
"-f", # fail on >400 errors
"-D",
"-", # "-D -" prints out HTML headers
"-L", # resolve 3xx redirects
url,
]
if not spack.config.get("config:verify_ssl"):
curl_args.append("-k")
if sys.stdout.isatty() and tty.msg_enabled():
curl_args.append("-#") # status bar when using a tty
else:
curl_args.append("-sS") # show errors if fail
connect_timeout = spack.config.get("config:connect_timeout", 10)
if timeout:
connect_timeout = max(int(connect_timeout), int(timeout))
if connect_timeout > 0:
curl_args.extend(["--connect-timeout", str(connect_timeout)])
return curl_args
def check_curl_code(returncode):
"""Check standard return code failures for provided arguments.
Arguments:
returncode (int): curl return code
Raises FetchError if the curl returncode indicates failure
"""
if returncode != 0:
if returncode == 22:
# This is a 404. Curl will print the error.
raise FetchError("URL was not found!")
if returncode == 60:
# This is a certificate error. Suggest spack -k
raise FetchError(
"Curl was unable to fetch due to invalid certificate. "
"This is either an attack, or your cluster's SSL "
"configuration is bad. If you believe your SSL "
"configuration is bad, you can try running spack -k, "
"which will not check SSL certificates."
"Use this at your own risk."
)
raise FetchError("Curl failed with error {0}".format(returncode))
def _curl(curl=None):
if not curl:
try:
curl = which("curl", required=True)
except CommandNotFoundError as exc:
tty.error(str(exc))
raise FetchError("Missing required curl fetch method")
return curl
def fetch_url_text(url, curl=None, dest_dir="."):
"""Retrieves text-only URL content using the configured fetch method.
It determines the fetch method from:
* config:url_fetch_method (str): fetch method to use (e.g., 'curl')
If the method is `curl`, it also uses the following configuration
options:
* config:connect_timeout (int): connection time out
* config:verify_ssl (str): Perform SSL verification
Arguments:
url (str): URL whose contents are to be fetched
curl (spack.util.executable.Executable or None): (optional) curl
executable if curl is the configured fetch method
dest_dir (str): (optional) destination directory for fetched text
file
Returns (str or None): path to the fetched file
Raises FetchError if the curl returncode indicates failure
"""
if not url:
raise FetchError("A URL is required to fetch its text")
tty.debug("Fetching text at {0}".format(url))
filename = os.path.basename(url)
path = os.path.join(dest_dir, filename)
fetch_method = spack.config.get("config:url_fetch_method")
tty.debug("Using '{0}' to fetch {1} into {2}".format(fetch_method, url, path))
if fetch_method == "curl":
curl_exe = _curl(curl)
if not curl_exe:
raise FetchError("Missing required fetch method (curl)")
curl_args = ["-O"]
curl_args.extend(base_curl_fetch_args(url))
# Curl automatically downloads file contents as filename
with working_dir(dest_dir, create=True):
_ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull)
check_curl_code(curl_exe.returncode)
return path
else:
try:
_, _, response = read_from_url(url)
returncode = response.getcode()
if returncode and returncode != 200:
raise FetchError("Urllib failed with error code {0}".format(returncode))
output = codecs.getreader("utf-8")(response).read()
if output:
with working_dir(dest_dir, create=True):
with open(filename, "w") as f:
f.write(output)
return path
except SpackWebError as err:
raise FetchError("Urllib fetch failed to verify url: {0}".format(str(err)))
return None
def url_exists(url, curl=None):
"""Determines whether url exists.
A scheme-specific process is used for Google Storage (`gs`) and Amazon
Simple Storage Service (`s3`) URLs; otherwise, the configured fetch
method defined by `config:url_fetch_method` is used.
If the method is `curl`, it also uses the following configuration option:
* config:verify_ssl (str): Perform SSL verification
Otherwise, `urllib` will be used.
Arguments:
url (str): URL whose existence is being checked
curl (spack.util.executable.Executable or None): (optional) curl
executable if curl is the configured fetch method
Returns (bool): True if it exists; False otherwise.
"""
tty.debug("Checking existence of {0}".format(url))
url_result = url_util.parse(url)
# Check if a local file
local_path = url_util.local_file_path(url_result)
if local_path: if local_path:
return os.path.exists(local_path) return os.path.exists(local_path)
if url.scheme == "s3": # Check if Amazon Simple Storage Service (S3) .. urllib-based fetch
# Check for URL specific connection information if url_result.scheme == "s3":
s3 = s3_util.create_s3_session(url, connection=s3_util.get_mirror_connection(url)) # Check for URL-specific connection information
s3 = s3_util.create_s3_session(
url_result, connection=s3_util.get_mirror_connection(url_result)
) # noqa: E501
try: try:
s3.get_object(Bucket=url.netloc, Key=url.path.lstrip("/")) s3.get_object(Bucket=url_result.netloc, Key=url_result.path.lstrip("/"))
return True return True
except s3.ClientError as err: except s3.ClientError as err:
if err.response["Error"]["Code"] == "NoSuchKey": if err.response["Error"]["Code"] == "NoSuchKey":
return False return False
raise err raise err
elif url.scheme == "gs": # Check if Google Storage .. urllib-based fetch
gcs = gcs_util.GCSBlob(url) if url_result.scheme == "gs":
gcs = gcs_util.GCSBlob(url_result)
return gcs.exists() return gcs.exists()
# otherwise, just try to "read" from the URL, and assume that *any* # Otherwise, use the configured fetch method
# non-throwing response contains the resource represented by the URL if spack.config.get("config:url_fetch_method") == "curl":
curl_exe = _curl(curl)
if not curl_exe:
return False
# Telling curl to fetch the first byte (-r 0-0) is supposed to be
# portable.
curl_args = ["--stderr", "-", "-s", "-f", "-r", "0-0", url]
if not spack.config.get("config:verify_ssl"):
curl_args.append("-k")
_ = curl_exe(*curl_args, fail_on_error=False, output=os.devnull)
return curl_exe.returncode == 0
# If we get here, then the only other fetch method option is urllib.
# So try to "read" from the URL and assume that *any* non-throwing
# response contains the resource represented by the URL.
try: try:
read_from_url(url) read_from_url(url)
return True return True
@ -716,6 +909,10 @@ def unfuzz(header):
raise raise
class FetchError(spack.error.SpackError):
"""Superclass for fetch-related errors."""
class SpackWebError(spack.error.SpackError): class SpackWebError(spack.error.SpackError):
"""Superclass for Spack web spidering errors.""" """Superclass for Spack web spidering errors."""