Add handling of custom ssl certs in urllib ops (#42953)

This PR allows the user to specify a path to a custom cert file (or directory) in
Spack's config:

```yaml
  # This is where custom certs for proxy/firewall are stored.
  # It can be a path or environment variable. To match ssl env configuration
  # the default is the environment variable SSL_CERT_FILE
  ssl_certs: $SSL_CERT_FILE
```

`config:ssl_certs` can be a path to a file or a directory, or it can be and environment
variable that resolves to one of those. When it posts to something valid, Spack will
update the ssl context to include custom certs, and fetching via `urllib` and `curl`
will trust the provided certs.

This should resolve many issues with fetching behind corporate firewalls.


---------

Co-authored-by: psakievich <psakievich@users.noreply.github.com>
Co-authored-by: Alec Scott <alec@bcs.sh>
This commit is contained in:
psakievich 2024-04-01 12:11:13 -06:00 committed by GitHub
parent b81d7d0aac
commit 7afa949da1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 169 additions and 4 deletions

View file

@ -101,6 +101,12 @@ config:
verify_ssl: true verify_ssl: true
# This is where custom certs for proxy/firewall are stored.
# It can be a path or environment variable. To match ssl env configuration
# the default is the environment variable SSL_CERT_FILE
ssl_certs: $SSL_CERT_FILE
# Suppress gpg warnings from binary package verification # Suppress gpg warnings from binary package verification
# Only suppresses warnings, gpg failure will still fail the install # Only suppresses warnings, gpg failure will still fail the install
# Potential rationale to set True: users have already explicitly trusted the # Potential rationale to set True: users have already explicitly trusted the

View file

@ -145,6 +145,22 @@ hosts when making ``ssl`` connections. Set to ``false`` to disable, and
tools like ``curl`` will use their ``--insecure`` options. Disabling tools like ``curl`` will use their ``--insecure`` options. Disabling
this can expose you to attacks. Use at your own risk. this can expose you to attacks. Use at your own risk.
--------------------
``ssl_certs``
--------------------
Path to custom certificats for SSL verification. The value can be a
filesytem path, or an environment variable that expands to a file path.
The default value is set to the environment variable ``SSL_CERT_FILE``
to use the same syntax used by many other applications that automatically
detect custom certificates.
When ``url_fetch_method:curl`` the ``config:ssl_certs`` should resolve to
a single file. Spack will then set the environment variable ``CURL_CA_BUNDLE``
in the subprocess calling ``curl``.
If ``url_fetch_method:urllib`` then files and directories are supported i.e.
``config:ssl_certs:$SSL_CERT_FILE`` or ``config:ssl_certs:$SSL_CERT_DIR``
will work.
-------------------- --------------------
``checksum`` ``checksum``
-------------------- --------------------

View file

@ -14,7 +14,7 @@
import xml.sax.saxutils import xml.sax.saxutils
from typing import Dict, Optional from typing import Dict, Optional
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.request import HTTPHandler, Request, build_opener from urllib.request import HTTPSHandler, Request, build_opener
import llnl.util.tty as tty import llnl.util.tty as tty
from llnl.util.filesystem import working_dir from llnl.util.filesystem import working_dir
@ -27,6 +27,7 @@
from spack.error import SpackError from spack.error import SpackError
from spack.util.crypto import checksum from spack.util.crypto import checksum
from spack.util.log_parse import parse_log_events from spack.util.log_parse import parse_log_events
from spack.util.web import urllib_ssl_cert_handler
from .base import Reporter from .base import Reporter
from .extract import extract_test_parts from .extract import extract_test_parts
@ -427,7 +428,7 @@ def upload(self, filename):
# Compute md5 checksum for the contents of this file. # Compute md5 checksum for the contents of this file.
md5sum = checksum(hashlib.md5, filename, block_size=8192) md5sum = checksum(hashlib.md5, filename, block_size=8192)
opener = build_opener(HTTPHandler) opener = build_opener(HTTPSHandler(context=urllib_ssl_cert_handler()))
with open(filename, "rb") as f: with open(filename, "rb") as f:
params_dict = { params_dict = {
"build": self.buildname, "build": self.buildname,

View file

@ -73,6 +73,7 @@
"environments_root": {"type": "string"}, "environments_root": {"type": "string"},
"connect_timeout": {"type": "integer", "minimum": 0}, "connect_timeout": {"type": "integer", "minimum": 0},
"verify_ssl": {"type": "boolean"}, "verify_ssl": {"type": "boolean"},
"ssl_certs": {"type": "string"},
"suppress_gpg_warnings": {"type": "boolean"}, "suppress_gpg_warnings": {"type": "boolean"},
"install_missing_compilers": {"type": "boolean"}, "install_missing_compilers": {"type": "boolean"},
"debug": {"type": "boolean"}, "debug": {"type": "boolean"},

View file

@ -10,6 +10,7 @@ config:
source_cache: $user_cache_path/source source_cache: $user_cache_path/source
misc_cache: $user_cache_path/cache misc_cache: $user_cache_path/cache
verify_ssl: true verify_ssl: true
ssl_certs: $SSL_CERT_FILE
checksum: true checksum: true
dirty: false dirty: false
concretizer: {0} concretizer: {0}

View file

@ -6,6 +6,7 @@
import email.message import email.message
import os import os
import pickle import pickle
import ssl
import urllib.request import urllib.request
import pytest import pytest
@ -363,3 +364,81 @@ def test_detailed_http_error_pickle(tmpdir):
assert deserialized.reason == "Not Found" assert deserialized.reason == "Not Found"
assert str(deserialized.info()) == str(headers) assert str(deserialized.info()) == str(headers)
assert str(deserialized) == str(error) assert str(deserialized) == str(error)
@pytest.fixture()
def ssl_scrubbed_env(mutable_config, monkeypatch):
"""clear out environment variables that could give false positives for SSL Cert tests"""
monkeypatch.delenv("SSL_CERT_FILE", raising=False)
monkeypatch.delenv("SSL_CERT_DIR", raising=False)
monkeypatch.delenv("CURL_CA_BUNDLE", raising=False)
spack.config.set("config:verify_ssl", True)
@pytest.mark.parametrize(
"cert_path,cert_creator",
[
pytest.param(
lambda base_path: os.path.join(base_path, "mock_cert.crt"),
lambda cert_path: open(cert_path, "w").close(),
id="cert_file",
),
pytest.param(
lambda base_path: os.path.join(base_path, "mock_cert"),
lambda cert_path: os.mkdir(cert_path),
id="cert_directory",
),
],
)
def test_ssl_urllib(
cert_path, cert_creator, tmpdir, ssl_scrubbed_env, mutable_config, monkeypatch
):
"""
create a proposed cert type and then verify that they exist inside ssl's checks
"""
spack.config.set("config:url_fetch_method", "urllib")
def mock_verify_locations(self, cafile, capath, cadata):
"""overwrite ssl's verification to simply check for valid file/path"""
assert cafile or capath
if cafile:
assert os.path.isfile(cafile)
if capath:
assert os.path.isdir(capath)
monkeypatch.setattr(ssl.SSLContext, "load_verify_locations", mock_verify_locations)
with tmpdir.as_cwd():
mock_cert = cert_path(tmpdir.strpath)
cert_creator(mock_cert)
spack.config.set("config:ssl_certs", mock_cert)
assert mock_cert == spack.config.get("config:ssl_certs", None)
ssl_context = spack.util.web.urllib_ssl_cert_handler()
assert ssl_context.verify_mode == ssl.CERT_REQUIRED
@pytest.mark.parametrize("cert_exists", [True, False], ids=["exists", "missing"])
def test_ssl_curl_cert_file(cert_exists, tmpdir, ssl_scrubbed_env, mutable_config, monkeypatch):
"""
Assure that if a valid cert file is specified curl executes
with CURL_CA_BUNDLE in the env
"""
spack.config.set("config:url_fetch_method", "curl")
with tmpdir.as_cwd():
mock_cert = str(tmpdir.join("mock_cert.crt"))
spack.config.set("config:ssl_certs", mock_cert)
if cert_exists:
open(mock_cert, "w").close()
assert os.path.isfile(mock_cert)
curl = spack.util.web._curl()
# arbitrary call to query the run env
dump_env = {}
curl("--help", output=str, _dump_env=dump_env)
if cert_exists:
assert dump_env["CURL_CA_BUNDLE"] == mock_cert
else:
assert "CURL_CA_BUNDLE" not in dump_env

View file

@ -27,6 +27,7 @@
import spack.config import spack.config
import spack.error import spack.error
import spack.util.path
import spack.util.url as url_util import spack.util.url as url_util
from .executable import CommandNotFoundError, which from .executable import CommandNotFoundError, which
@ -59,6 +60,66 @@ def http_error_default(self, req, fp, code, msg, hdrs):
raise DetailedHTTPError(req, code, msg, hdrs, fp) raise DetailedHTTPError(req, code, msg, hdrs, fp)
dbg_msg_no_ssl_cert_config = (
"config:ssl_certs not in configuration. "
"Default cert configuation and environment will be used."
)
def urllib_ssl_cert_handler():
"""context for configuring ssl during urllib HTTPS operations"""
custom_cert_var = spack.config.get("config:ssl_certs")
if custom_cert_var:
# custom certs will be a location, so expand env variables, paths etc
certs = spack.util.path.canonicalize_path(custom_cert_var)
tty.debug("URLLIB: Looking for custom SSL certs at {}".format(certs))
if os.path.isfile(certs):
tty.debug("URLLIB: Custom SSL certs file found at {}".format(certs))
return ssl.create_default_context(cafile=certs)
elif os.path.isdir(certs):
tty.debug("URLLIB: Custom SSL certs directory found at {}".format(certs))
return ssl.create_default_context(capath=certs)
else:
tty.debug("URLLIB: Custom SSL certs not found")
return ssl.create_default_context()
else:
tty.debug(dbg_msg_no_ssl_cert_config)
return ssl.create_default_context()
# curl requires different strategies for custom certs at runtime depending on if certs
# are stored as a file or a directory
def append_curl_env_for_ssl_certs(curl):
"""
configure curl to use custom certs in a file at run time
see: https://curl.se/docs/sslcerts.html item 4
"""
custom_cert_var = spack.config.get("config:ssl_certs")
if custom_cert_var:
# custom certs will be a location, so expand env variables, paths etc
certs = spack.util.path.canonicalize_path(custom_cert_var)
tty.debug("CURL: Looking for custom SSL certs file at {}".format(certs))
if os.path.isfile(certs):
tty.debug(
"CURL: Configuring curl to use custom"
" certs from {} by setting "
"CURL_CA_BUNDLE".format(certs)
)
curl.add_default_env("CURL_CA_BUNDLE", certs)
elif os.path.isdir(certs):
tty.warn(
"CURL config:ssl_certs"
" is a directory but cURL only supports files. Default certs will be used instead."
)
else:
tty.debug(
"CURL config:ssl_certs "
"resolves to {}. This is not a file so default certs will be used.".format(certs)
)
tty.debug(dbg_msg_no_ssl_cert_config)
return curl
def _urlopen(): def _urlopen():
s3 = UrllibS3Handler() s3 = UrllibS3Handler()
gcs = GCSHandler() gcs = GCSHandler()
@ -66,7 +127,7 @@ def _urlopen():
# One opener with HTTPS ssl enabled # One opener with HTTPS ssl enabled
with_ssl = build_opener( with_ssl = build_opener(
s3, gcs, HTTPSHandler(context=ssl.create_default_context()), error_handler s3, gcs, HTTPSHandler(context=urllib_ssl_cert_handler()), error_handler
) )
# One opener with HTTPS ssl disabled # One opener with HTTPS ssl disabled
@ -287,7 +348,7 @@ def _curl(curl=None):
except CommandNotFoundError as exc: except CommandNotFoundError as exc:
tty.error(str(exc)) tty.error(str(exc))
raise spack.error.FetchError("Missing required curl fetch method") raise spack.error.FetchError("Missing required curl fetch method")
return curl return append_curl_env_for_ssl_certs(curl)
def fetch_url_text(url, curl=None, dest_dir="."): def fetch_url_text(url, curl=None, dest_dir="."):