Improve build isolation in PythonPipBuilder (#40224)

We run pip with `--no-build-isolation` because we don't wanna let pip
install build deps.

As a consequence, when pip runs hooks, it runs hooks of *any* package it
can find in `sys.path`.

For Spack-built Python this includes user site packages -- there
shouldn't be any system site packages. So in this case it suffices to
set the environment variable PYTHONNOUSERSITE=1.

For external Python, more needs to be done, cause there is no env
variable that disables both system and user site packages; setting the
`python -S` flag doesn't work because pip runs subprocesses that don't
inherit this flag (and there is no API to know if -S was passed)

So, for external Python, an empty venv is created before invoking pip in
Spack's build env ensures that pip can no longer see anything but
standard libraries and `PYTHONPATH`.

The downside of this is that pip will generate shebangs that point to
the python executable from the venv. So, for external python an extra
step is necessary where we fix up shebangs post install.
This commit is contained in:
Harmen Stoppels 2023-10-04 21:38:50 +02:00 committed by GitHub
parent d297098504
commit 0f43074f3e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -6,6 +6,7 @@
import os
import re
import shutil
import stat
from typing import Optional
import archspec
@ -25,6 +26,7 @@
from spack.directives import build_system, depends_on, extends, maintainers
from spack.error import NoHeadersError, NoLibrariesError, SpecError
from spack.install_test import test_part
from spack.util.executable import Executable
from spack.version import Version
from ._checks import BaseBuilder, execute_install_time_tests
@ -351,6 +353,51 @@ def libs(self):
raise NoLibrariesError(msg.format(self.spec.name, root))
def fixup_shebangs(path: str, old_interpreter: bytes, new_interpreter: bytes):
# Recurse into the install prefix and fixup shebangs
exe = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
dirs = [path]
hardlinks = set()
while dirs:
with os.scandir(dirs.pop()) as entries:
for entry in entries:
if entry.is_dir(follow_symlinks=False):
dirs.append(entry.path)
continue
# Only consider files, not symlinks
if not entry.is_file(follow_symlinks=False):
continue
lstat = entry.stat(follow_symlinks=False)
# Skip over files that are not executable
if not (lstat.st_mode & exe):
continue
# Don't modify hardlinks more than once
if lstat.st_nlink > 1:
key = (lstat.st_ino, lstat.st_dev)
if key in hardlinks:
continue
hardlinks.add(key)
# Finally replace shebangs if any.
with open(entry.path, "rb+") as f:
contents = f.read(2)
if contents != b"#!":
continue
contents += f.read()
if old_interpreter not in contents:
continue
f.seek(0)
f.write(contents.replace(old_interpreter, new_interpreter))
f.truncate()
@spack.builder.builder("python_pip")
class PythonPipBuilder(BaseBuilder):
phases = ("install",)
@ -447,8 +494,36 @@ def global_options(self, spec, prefix):
"""
return []
@property
def _build_venv_path(self):
"""Return the path to the virtual environment used for building when
python is external."""
return os.path.join(self.spec.package.stage.path, "build_env")
@property
def _build_venv_python(self) -> Executable:
"""Return the Python executable in the build virtual environment when
python is external."""
return Executable(os.path.join(self._build_venv_path, "bin", "python"))
def install(self, pkg, spec, prefix):
"""Install everything from build directory."""
python: Executable = spec["python"].command
# Since we invoke pip with --no-build-isolation, we have to make sure that pip cannot
# execute hooks from user and system site-packages.
if spec["python"].external:
# There are no environment variables to disable the system site-packages, so we use a
# virtual environment instead. The downside of this approach is that pip produces
# incorrect shebangs that refer to the virtual environment, which we have to fix up.
python("-m", "venv", "--without-pip", self._build_venv_path)
pip = self._build_venv_python
else:
# For a Spack managed Python, system site-packages is empty/unused by design, so it
# suffices to disable user site-packages, for which there is an environment variable.
pip = python
pip.add_default_env("PYTHONNOUSERSITE", "1")
pip.add_default_arg("-m")
pip.add_default_arg("pip")
args = PythonPipBuilder.std_args(pkg) + ["--prefix=" + prefix]
@ -472,8 +547,31 @@ def install(self, pkg, spec, prefix):
else:
args.append(".")
pip = inspect.getmodule(pkg).pip
with fs.working_dir(self.build_directory):
pip(*args)
@spack.builder.run_after("install")
def fixup_shebangs_pointing_to_build(self):
"""When installing a package using an external python, we use a temporary virtual
environment which improves build isolation. The downside is that pip produces shebangs
that point to the temporary virtual environment. This method fixes them up to point to the
underlying Python."""
# No need to fixup shebangs if no build venv was used. (this post install function also
# runs when install was overridden in another package, so check existence of the venv path)
if not os.path.exists(self._build_venv_path):
return
# Use sys.executable, since that's what pip uses.
interpreter = (
lambda python: python("-c", "import sys; print(sys.executable)", output=str)
.strip()
.encode("utf-8")
)
fixup_shebangs(
path=self.spec.prefix,
old_interpreter=interpreter(self._build_venv_python),
new_interpreter=interpreter(self.spec["python"].command),
)
spack.builder.run_after("install")(execute_install_time_tests)