Add config option to use urllib to fetch if curl missing (#21398)

* Use Python module urllib to fetch in the case that curl is missing
This commit is contained in:
loulawrence 2021-06-22 16:38:37 -04:00 committed by GitHub
parent 477c8ce820
commit 4da0561496
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 226 additions and 116 deletions

View file

@ -316,10 +316,10 @@ def fetch(self):
try: try:
partial_file, save_file = self._fetch_from_url(url) partial_file, save_file = self._fetch_from_url(url)
if save_file: if save_file and (partial_file is not None):
os.rename(partial_file, save_file) os.rename(partial_file, save_file)
break break
except FetchError as e: except FailedDownloadError as e:
errors.append(str(e)) errors.append(str(e))
for msg in errors: for msg in errors:
@ -330,16 +330,68 @@ def fetch(self):
def _existing_url(self, url): def _existing_url(self, url):
tty.debug('Checking existence of {0}'.format(url)) tty.debug('Checking existence of {0}'.format(url))
curl = self.curl
# Telling curl to fetch the first byte (-r 0-0) is supposed to be if spack.config.get('config:use_curl'):
# portable. curl = self.curl
curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url] # Telling curl to fetch the first byte (-r 0-0) is supposed to be
if not spack.config.get('config:verify_ssl'): # portable.
curl_args.append('-k') curl_args = ['--stderr', '-', '-s', '-f', '-r', '0-0', url]
_ = curl(*curl_args, fail_on_error=False, output=os.devnull) if not spack.config.get('config:verify_ssl'):
return curl.returncode == 0 curl_args.append('-k')
_ = curl(*curl_args, fail_on_error=False, output=os.devnull)
return curl.returncode == 0
else:
# Telling urllib to check if url is accessible
try:
url, headers, response = web_util.read_from_url(url)
except web_util.SpackWebError:
msg = "Urllib fetch failed to verify url {0}".format(url)
raise FailedDownloadError(url, msg)
return (response.getcode() is None or response.getcode() == 200)
def _fetch_from_url(self, url): def _fetch_from_url(self, url):
if spack.config.get('config:use_curl'):
return self._fetch_curl(url)
else:
return self._fetch_urllib(url)
def _check_headers(self, headers):
# Check if we somehow got an HTML file rather than the archive we
# asked for. We only look at the last content type, to handle
# redirects properly.
content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
flags=re.IGNORECASE)
if content_types and 'text/html' in content_types[-1]:
warn_content_type_mismatch(self.archive_file or "the archive")
@_needs_stage
def _fetch_urllib(self, url):
save_file = None
if self.stage.save_filename:
save_file = self.stage.save_filename
tty.msg('Fetching {0}'.format(url))
# Run urllib but grab the mime type from the http headers
try:
url, headers, response = web_util.read_from_url(url)
except web_util.SpackWebError as e:
# clean up archive on failure.
if self.archive_file:
os.remove(self.archive_file)
if save_file and os.path.exists(save_file):
os.remove(save_file)
msg = 'urllib failed to fetch with error {0}'.format(e)
raise FailedDownloadError(url, msg)
_data = response.read()
with open(save_file, 'wb') as _open_file:
_open_file.write(_data)
headers = _data.decode('utf-8', 'ignore')
self._check_headers(headers)
return None, save_file
@_needs_stage
def _fetch_curl(self, url):
save_file = None save_file = None
partial_file = None partial_file = None
if self.stage.save_filename: if self.stage.save_filename:
@ -423,13 +475,7 @@ def _fetch_from_url(self, url):
url, url,
"Curl failed with error %d" % curl.returncode) "Curl failed with error %d" % curl.returncode)
# Check if we somehow got an HTML file rather than the archive we self._check_headers(headers)
# asked for. We only look at the last content type, to handle
# redirects properly.
content_types = re.findall(r'Content-Type:[^\r\n]+', headers,
flags=re.IGNORECASE)
if content_types and 'text/html' in content_types[-1]:
warn_content_type_mismatch(self.archive_file or "the archive")
return partial_file, save_file return partial_file, save_file
@property # type: ignore # decorated properties unsupported in mypy @property # type: ignore # decorated properties unsupported in mypy

View file

@ -8,29 +8,33 @@
from llnl.util.filesystem import mkdirp, touch from llnl.util.filesystem import mkdirp, touch
import spack.config
from spack.stage import Stage from spack.stage import Stage
from spack.fetch_strategy import CacheURLFetchStrategy, NoCacheError from spack.fetch_strategy import CacheURLFetchStrategy, NoCacheError
def test_fetch_missing_cache(tmpdir): @pytest.mark.parametrize('use_curl', [True, False])
def test_fetch_missing_cache(tmpdir, use_curl):
"""Ensure raise a missing cache file.""" """Ensure raise a missing cache file."""
testpath = str(tmpdir) testpath = str(tmpdir)
with spack.config.override('config:use_curl', use_curl):
fetcher = CacheURLFetchStrategy(url='file:///not-a-real-cache-file') fetcher = CacheURLFetchStrategy(url='file:///not-a-real-cache-file')
with Stage(fetcher, path=testpath): with Stage(fetcher, path=testpath):
with pytest.raises(NoCacheError, match=r'No cache'): with pytest.raises(NoCacheError, match=r'No cache'):
fetcher.fetch() fetcher.fetch()
def test_fetch(tmpdir): @pytest.mark.parametrize('use_curl', [True, False])
def test_fetch(tmpdir, use_curl):
"""Ensure a fetch after expanding is effectively a no-op.""" """Ensure a fetch after expanding is effectively a no-op."""
testpath = str(tmpdir) testpath = str(tmpdir)
cache = os.path.join(testpath, 'cache.tar.gz') cache = os.path.join(testpath, 'cache.tar.gz')
touch(cache) touch(cache)
url = 'file:///{0}'.format(cache) url = 'file:///{0}'.format(cache)
with spack.config.override('config:use_curl', use_curl):
fetcher = CacheURLFetchStrategy(url=url) fetcher = CacheURLFetchStrategy(url=url)
with Stage(fetcher, path=testpath) as stage: with Stage(fetcher, path=testpath) as stage:
source_path = stage.source_path source_path = stage.source_path
mkdirp(source_path) mkdirp(source_path)
fetcher.fetch() fetcher.fetch()

View file

@ -6,41 +6,48 @@
import os import os
import pytest import pytest
import spack.config as spack_config
import spack.fetch_strategy as spack_fs import spack.fetch_strategy as spack_fs
import spack.stage as spack_stage import spack.stage as spack_stage
def test_s3fetchstrategy_sans_url(): @pytest.mark.parametrize('use_curl', [True, False])
def test_s3fetchstrategy_sans_url(use_curl):
"""Ensure constructor with no URL fails.""" """Ensure constructor with no URL fails."""
with pytest.raises(ValueError): with spack_config.override('config:use_curl', use_curl):
spack_fs.S3FetchStrategy(None) with pytest.raises(ValueError):
spack_fs.S3FetchStrategy(None)
def test_s3fetchstrategy_bad_url(tmpdir): @pytest.mark.parametrize('use_curl', [True, False])
def test_s3fetchstrategy_bad_url(tmpdir, use_curl):
"""Ensure fetch with bad URL fails as expected.""" """Ensure fetch with bad URL fails as expected."""
testpath = str(tmpdir) testpath = str(tmpdir)
fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist') with spack_config.override('config:use_curl', use_curl):
assert fetcher is not None fetcher = spack_fs.S3FetchStrategy(url='file:///does-not-exist')
assert fetcher is not None
with spack_stage.Stage(fetcher, path=testpath) as stage: with spack_stage.Stage(fetcher, path=testpath) as stage:
assert stage is not None assert stage is not None
assert fetcher.archive_file is None assert fetcher.archive_file is None
with pytest.raises(spack_fs.FetchError): with pytest.raises(spack_fs.FetchError):
fetcher.fetch() fetcher.fetch()
def test_s3fetchstrategy_downloaded(tmpdir): @pytest.mark.parametrize('use_curl', [True, False])
def test_s3fetchstrategy_downloaded(tmpdir, use_curl):
"""Ensure fetch with archive file already downloaded is a noop.""" """Ensure fetch with archive file already downloaded is a noop."""
testpath = str(tmpdir) testpath = str(tmpdir)
archive = os.path.join(testpath, 's3.tar.gz') archive = os.path.join(testpath, 's3.tar.gz')
class Archived_S3FS(spack_fs.S3FetchStrategy): with spack_config.override('config:use_curl', use_curl):
@property class Archived_S3FS(spack_fs.S3FetchStrategy):
def archive_file(self): @property
return archive def archive_file(self):
return archive
url = 's3:///{0}'.format(archive) url = 's3:///{0}'.format(archive)
fetcher = Archived_S3FS(url=url) fetcher = Archived_S3FS(url=url)
with spack_stage.Stage(fetcher, path=testpath): with spack_stage.Stage(fetcher, path=testpath):
fetcher.fetch() fetcher.fetch()

View file

@ -19,6 +19,7 @@
from spack.version import ver from spack.version import ver
import spack.util.crypto as crypto import spack.util.crypto as crypto
import spack.util.executable import spack.util.executable
from spack.util.executable import which
@pytest.fixture(params=list(crypto.hashes.keys())) @pytest.fixture(params=list(crypto.hashes.keys()))
@ -47,19 +48,36 @@ def fn(v):
return factory return factory
def test_urlfetchstrategy_sans_url(): @pytest.mark.parametrize('use_curl', [True, False])
def test_urlfetchstrategy_sans_url(use_curl):
"""Ensure constructor with no URL fails.""" """Ensure constructor with no URL fails."""
with pytest.raises(ValueError): with spack.config.override('config:use_curl', use_curl):
with fs.URLFetchStrategy(None): with pytest.raises(ValueError):
pass with fs.URLFetchStrategy(None):
pass
def test_urlfetchstrategy_bad_url(tmpdir): @pytest.mark.parametrize('use_curl', [True, False])
def test_urlfetchstrategy_bad_url(tmpdir, use_curl):
"""Ensure fetch with bad URL fails as expected.""" """Ensure fetch with bad URL fails as expected."""
testpath = str(tmpdir) testpath = str(tmpdir)
with spack.config.override('config:use_curl', use_curl):
with pytest.raises(fs.FailedDownloadError):
fetcher = fs.URLFetchStrategy(url='file:///does-not-exist')
assert fetcher is not None
with pytest.raises(fs.FailedDownloadError): with Stage(fetcher, path=testpath) as stage:
fetcher = fs.URLFetchStrategy(url='file:///does-not-exist') assert stage is not None
assert fetcher.archive_file is None
fetcher.fetch()
def test_fetch_options(tmpdir, mock_archive):
testpath = str(tmpdir)
with spack.config.override('config:use_curl', True):
fetcher = fs.URLFetchStrategy(url=mock_archive.url,
fetch_options={'cookie': 'True',
'timeout': 10})
assert fetcher is not None assert fetcher is not None
with Stage(fetcher, path=testpath) as stage: with Stage(fetcher, path=testpath) as stage:
@ -68,7 +86,32 @@ def test_urlfetchstrategy_bad_url(tmpdir):
fetcher.fetch() fetcher.fetch()
@pytest.mark.parametrize('use_curl', [True, False])
def test_archive_file_errors(tmpdir, mock_archive, use_curl):
"""Ensure FetchStrategy commands may only be used as intended"""
testpath = str(tmpdir)
with spack.config.override('config:use_curl', use_curl):
fetcher = fs.URLFetchStrategy(url=mock_archive.url)
assert fetcher is not None
with pytest.raises(fs.FailedDownloadError):
with Stage(fetcher, path=testpath) as stage:
assert stage is not None
assert fetcher.archive_file is None
with pytest.raises(fs.NoArchiveFileError):
fetcher.archive(testpath)
with pytest.raises(fs.NoArchiveFileError):
fetcher.expand()
with pytest.raises(fs.NoArchiveFileError):
fetcher.reset()
stage.fetch()
with pytest.raises(fs.NoDigestError):
fetcher.check()
assert fetcher.archive_file is not None
fetcher._fetch_from_url('file:///does-not-exist')
@pytest.mark.parametrize('secure', [True, False]) @pytest.mark.parametrize('secure', [True, False])
@pytest.mark.parametrize('use_curl', [True, False])
@pytest.mark.parametrize('mock_archive', @pytest.mark.parametrize('mock_archive',
[('.tar.gz', 'z'), ('.tgz', 'z'), [('.tar.gz', 'z'), ('.tgz', 'z'),
('.tar.bz2', 'j'), ('.tbz2', 'j'), ('.tar.bz2', 'j'), ('.tbz2', 'j'),
@ -77,6 +120,7 @@ def test_urlfetchstrategy_bad_url(tmpdir):
def test_fetch( def test_fetch(
mock_archive, mock_archive,
secure, secure,
use_curl,
checksum_type, checksum_type,
config, config,
mutable_mock_repo mutable_mock_repo
@ -102,8 +146,8 @@ def test_fetch(
# Enter the stage directory and check some properties # Enter the stage directory and check some properties
with pkg.stage: with pkg.stage:
with spack.config.override('config:verify_ssl', secure): with spack.config.override('config:verify_ssl', secure):
pkg.do_stage() with spack.config.override('config:use_curl', use_curl):
pkg.do_stage()
with working_dir(pkg.stage.source_path): with working_dir(pkg.stage.source_path):
assert os.path.exists('configure') assert os.path.exists('configure')
assert is_exe('configure') assert is_exe('configure')
@ -123,37 +167,41 @@ def test_fetch(
('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'), ('url-list-test @3.0a1', 'foo-3.0a1.tar.gz', 'abc30a1'),
('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'), ('url-list-test @4.5-rc5', 'foo-4.5-rc5.tar.gz', 'abc45rc5'),
]) ])
def test_from_list_url(mock_packages, config, spec, url, digest): @pytest.mark.parametrize('use_curl', [True, False])
def test_from_list_url(mock_packages, config, spec, url, digest, use_curl):
""" """
Test URLs in the url-list-test package, which means they should Test URLs in the url-list-test package, which means they should
have checksums in the package. have checksums in the package.
""" """
specification = Spec(spec).concretized() with spack.config.override('config:use_curl', use_curl):
pkg = spack.repo.get(specification) specification = Spec(spec).concretized()
fetch_strategy = fs.from_list_url(pkg) pkg = spack.repo.get(specification)
assert isinstance(fetch_strategy, fs.URLFetchStrategy) fetch_strategy = fs.from_list_url(pkg)
assert os.path.basename(fetch_strategy.url) == url assert isinstance(fetch_strategy, fs.URLFetchStrategy)
assert fetch_strategy.digest == digest assert os.path.basename(fetch_strategy.url) == url
assert fetch_strategy.extra_options == {} assert fetch_strategy.digest == digest
pkg.fetch_options = {'timeout': 60} assert fetch_strategy.extra_options == {}
fetch_strategy = fs.from_list_url(pkg) pkg.fetch_options = {'timeout': 60}
assert fetch_strategy.extra_options == {'timeout': 60} fetch_strategy = fs.from_list_url(pkg)
assert fetch_strategy.extra_options == {'timeout': 60}
def test_from_list_url_unspecified(mock_packages, config): @pytest.mark.parametrize('use_curl', [True, False])
def test_from_list_url_unspecified(mock_packages, config, use_curl):
"""Test non-specific URLs from the url-list-test package.""" """Test non-specific URLs from the url-list-test package."""
pkg = spack.repo.get('url-list-test') with spack.config.override('config:use_curl', use_curl):
pkg = spack.repo.get('url-list-test')
spec = Spec('url-list-test @2.0.0').concretized() spec = Spec('url-list-test @2.0.0').concretized()
pkg = spack.repo.get(spec) pkg = spack.repo.get(spec)
fetch_strategy = fs.from_list_url(pkg) fetch_strategy = fs.from_list_url(pkg)
assert isinstance(fetch_strategy, fs.URLFetchStrategy) assert isinstance(fetch_strategy, fs.URLFetchStrategy)
assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz' assert os.path.basename(fetch_strategy.url) == 'foo-2.0.0.tar.gz'
assert fetch_strategy.digest is None assert fetch_strategy.digest is None
assert fetch_strategy.extra_options == {} assert fetch_strategy.extra_options == {}
pkg.fetch_options = {'timeout': 60} pkg.fetch_options = {'timeout': 60}
fetch_strategy = fs.from_list_url(pkg) fetch_strategy = fs.from_list_url(pkg)
assert fetch_strategy.extra_options == {'timeout': 60} assert fetch_strategy.extra_options == {'timeout': 60}
def test_nosource_from_list_url(mock_packages, config): def test_nosource_from_list_url(mock_packages, config):
@ -176,6 +224,8 @@ def test_unknown_hash(checksum_type):
crypto.Checker('a') crypto.Checker('a')
@pytest.mark.skipif(which('curl') is None,
reason='Urllib does not have built-in status bar')
def test_url_with_status_bar(tmpdir, mock_archive, monkeypatch, capfd): def test_url_with_status_bar(tmpdir, mock_archive, monkeypatch, capfd):
"""Ensure fetch with status bar option succeeds.""" """Ensure fetch with status bar option succeeds."""
def is_true(): def is_true():
@ -185,26 +235,27 @@ def is_true():
monkeypatch.setattr(sys.stdout, 'isatty', is_true) monkeypatch.setattr(sys.stdout, 'isatty', is_true)
monkeypatch.setattr(tty, 'msg_enabled', is_true) monkeypatch.setattr(tty, 'msg_enabled', is_true)
with spack.config.override('config:use_curl', True):
fetcher = fs.URLFetchStrategy(mock_archive.url)
with Stage(fetcher, path=testpath) as stage:
assert fetcher.archive_file is None
stage.fetch()
fetcher = fs.URLFetchStrategy(mock_archive.url) status = capfd.readouterr()[1]
with Stage(fetcher, path=testpath) as stage: assert '##### 100' in status
assert fetcher.archive_file is None
stage.fetch()
status = capfd.readouterr()[1]
assert '##### 100' in status
def test_url_extra_fetch(tmpdir, mock_archive): @pytest.mark.parametrize('use_curl', [True, False])
def test_url_extra_fetch(tmpdir, mock_archive, use_curl):
"""Ensure a fetch after downloading is effectively a no-op.""" """Ensure a fetch after downloading is effectively a no-op."""
testpath = str(tmpdir) with spack.config.override('config:use_curl', use_curl):
testpath = str(tmpdir)
fetcher = fs.URLFetchStrategy(mock_archive.url) fetcher = fs.URLFetchStrategy(mock_archive.url)
with Stage(fetcher, path=testpath) as stage: with Stage(fetcher, path=testpath) as stage:
assert fetcher.archive_file is None assert fetcher.archive_file is None
stage.fetch() stage.fetch()
assert fetcher.archive_file is not None assert fetcher.archive_file is not None
fetcher.fetch() fetcher.fetch()
@pytest.mark.parametrize('url,urls,version,expected', [ @pytest.mark.parametrize('url,urls,version,expected', [
@ -215,17 +266,19 @@ def test_url_extra_fetch(tmpdir, mock_archive):
['https://ftpmirror.gnu.org/autoconf/autoconf-2.62.tar.gz', ['https://ftpmirror.gnu.org/autoconf/autoconf-2.62.tar.gz',
'https://ftp.gnu.org/gnu/autoconf/autoconf-2.62.tar.gz']) 'https://ftp.gnu.org/gnu/autoconf/autoconf-2.62.tar.gz'])
]) ])
def test_candidate_urls(pkg_factory, url, urls, version, expected): @pytest.mark.parametrize('use_curl', [True, False])
def test_candidate_urls(pkg_factory, url, urls, version, expected, use_curl):
"""Tests that candidate urls include mirrors and that they go through """Tests that candidate urls include mirrors and that they go through
pattern matching and substitution for versions. pattern matching and substitution for versions.
""" """
pkg = pkg_factory(url, urls) with spack.config.override('config:use_curl', use_curl):
f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version) pkg = pkg_factory(url, urls)
assert f.candidate_urls == expected f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
assert f.extra_options == {} assert f.candidate_urls == expected
pkg = pkg_factory(url, urls, fetch_options={'timeout': 60}) assert f.extra_options == {}
f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version) pkg = pkg_factory(url, urls, fetch_options={'timeout': 60})
assert f.extra_options == {'timeout': 60} f = fs._from_merged_attrs(fs.URLFetchStrategy, pkg, version)
assert f.extra_options == {'timeout': 60}
@pytest.mark.regression('19673') @pytest.mark.regression('19673')
@ -244,11 +297,10 @@ def _which(*args, **kwargs):
testpath = str(tmpdir) testpath = str(tmpdir)
url = 'http://github.com/spack/spack' url = 'http://github.com/spack/spack'
fetcher = fs.URLFetchStrategy(url=url) with spack.config.override('config:use_curl', True):
assert fetcher is not None fetcher = fs.URLFetchStrategy(url=url)
assert fetcher is not None
with pytest.raises(TypeError, match='object is not callable'): with pytest.raises(TypeError, match='object is not callable'):
with Stage(fetcher, path=testpath) as stage: with Stage(fetcher, path=testpath) as stage:
out = stage.fetch() out = stage.fetch()
assert err_fmt.format('curl') in out
assert err_fmt.format('curl') in out

View file

@ -105,7 +105,8 @@ def read_from_url(url, accept_content_type=None):
else: else:
# User has explicitly indicated that they do not want SSL # User has explicitly indicated that they do not want SSL
# verification. # verification.
context = ssl._create_unverified_context() if not __UNABLE_TO_VERIFY_SSL:
context = ssl._create_unverified_context()
req = Request(url_util.format(url)) req = Request(url_util.format(url))
content_type = None content_type = None