Cleanup of binary text relocation (#34188)

Move the relocation of binary text in its own class

Drop threaded text replacement, since the current bottleneck 
is decompression. It would be better to parallellize over packages,
instead of over files per package.

A small improvement with separate classes for text replacement is that we
now compile the regex in the constructor; previously it was compiled per
binary to be relocated.
This commit is contained in:
Harmen Stoppels 2023-01-26 12:18:53 +01:00 committed by GitHub
parent eeba92e788
commit 6847d73504
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 561 additions and 484 deletions

View file

@ -47,7 +47,7 @@
import spack.util.url as url_util
import spack.util.web as web_util
from spack.caches import misc_cache_location
from spack.relocate import utf8_paths_to_single_binary_regex
from spack.relocate_text import utf8_paths_to_single_binary_regex
from spack.spec import Spec
from spack.stage import Stage
from spack.util.executable import which
@ -1730,16 +1730,16 @@ def is_backup_file(file):
# For all buildcaches
# relocate the install prefixes in text files including dependencies
relocate.unsafe_relocate_text(text_names, prefix_to_prefix_text)
relocate.relocate_text(text_names, prefix_to_prefix_text)
# relocate the install prefixes in binary files including dependencies
relocate.unsafe_relocate_text_bin(files_to_relocate, prefix_to_prefix_bin)
relocate.relocate_text_bin(files_to_relocate, prefix_to_prefix_bin)
# If we are installing back to the same location
# relocate the sbang location if the spack directory changed
else:
if old_spack_prefix != new_spack_prefix:
relocate.unsafe_relocate_text(text_names, prefix_to_prefix_text)
relocate.relocate_text(text_names, prefix_to_prefix_text)
def _extract_inner_tarball(spec, filename, extract_to, unsigned, remote_checksum):

View file

@ -90,11 +90,11 @@ def view_copy(src, dst, view, spec=None):
prefix_to_projection[dep.prefix] = view.get_projection_for_spec(dep)
if spack.relocate.is_binary(dst):
spack.relocate.unsafe_relocate_text_bin(binaries=[dst], prefixes=prefix_to_projection)
spack.relocate.relocate_text_bin(binaries=[dst], prefixes=prefix_to_projection)
else:
prefix_to_projection[spack.store.layout.root] = view._root
prefix_to_projection[orig_sbang] = new_sbang
spack.relocate.unsafe_relocate_text(files=[dst], prefixes=prefix_to_projection)
spack.relocate.relocate_text(files=[dst], prefixes=prefix_to_projection)
try:
stat = os.stat(src)
os.chown(dst, stat.st_uid, stat.st_gid)

View file

@ -4,7 +4,6 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import collections
import itertools
import multiprocessing.pool
import os
import re
import shutil
@ -27,6 +26,8 @@
import spack.util.elf as elf
import spack.util.executable as executable
from .relocate_text import BinaryFilePrefixReplacer, TextFilePrefixReplacer
is_macos = str(spack.platforms.real_host()) == "darwin"
@ -46,49 +47,6 @@ def __init__(self, file_path, root_path):
)
class BinaryStringReplacementError(spack.error.SpackError):
def __init__(self, file_path, old_len, new_len):
"""The size of the file changed after binary path substitution
Args:
file_path (str): file with changing size
old_len (str): original length of the file
new_len (str): length of the file after substitution
"""
super(BinaryStringReplacementError, self).__init__(
"Doing a binary string replacement in %s failed.\n"
"The size of the file changed from %s to %s\n"
"when it should have remanined the same." % (file_path, old_len, new_len)
)
class BinaryTextReplaceError(spack.error.SpackError):
def __init__(self, msg):
msg += (
" To fix this, compile with more padding "
"(config:install_tree:padded_length), or install to a shorter prefix."
)
super(BinaryTextReplaceError, self).__init__(msg)
class CannotGrowString(BinaryTextReplaceError):
def __init__(self, old, new):
msg = "Cannot replace {!r} with {!r} because the new prefix is longer.".format(old, new)
super(CannotGrowString, self).__init__(msg)
class CannotShrinkCString(BinaryTextReplaceError):
def __init__(self, old, new, full_old_string):
# Just interpolate binary string to not risk issues with invalid
# unicode, which would be really bad user experience: error in error.
# We have no clue if we actually deal with a real C-string nor what
# encoding it has.
msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
old, new, full_old_string
)
super(CannotShrinkCString, self).__init__(msg)
@memoized
def _patchelf():
"""Return the full path to the patchelf binary, if available, else None."""
@ -450,108 +408,6 @@ def needs_text_relocation(m_type, m_subtype):
return m_type == "text"
def apply_binary_replacements(f, prefix_to_prefix, suffix_safety_size=7):
"""
Given a file opened in rb+ mode, apply the string replacements as
specified by an ordered dictionary of prefix to prefix mappings. This
method takes special care of null-terminated C-strings. C-string constants
are problematic because compilers and linkers optimize readonly strings for
space by aliasing those that share a common suffix (only suffix since all
of them are null terminated). See https://github.com/spack/spack/pull/31739
and https://github.com/spack/spack/pull/32253 for details. Our logic matches
the original prefix with a ``suffix_safety_size + 1`` lookahead for null bytes.
If no null terminator is found, we simply pad with leading /, assuming that
it's a long C-string; the full C-string after replacement has a large suffix
in common with its original value.
If there *is* a null terminator we can do the same as long as the replacement
has a sufficiently long common suffix with the original prefix.
As a last resort when the replacement does not have a long enough common suffix,
we can try to shorten the string, but this only works if the new length is
sufficiently short (typically the case when going from large padding -> normal path)
If the replacement string is longer, or all of the above fails, we error out.
Arguments:
f: file opened in rb+ mode
prefix_to_prefix (OrderedDict): OrderedDictionary where the keys are
bytes representing the old prefixes and the values are the new
suffix_safety_size (int): in case of null terminated strings, what size
of the suffix should remain to avoid aliasing issues?
"""
assert suffix_safety_size >= 0
assert f.tell() == 0
# Look for exact matches of our paths, and also look if there's a null terminator
# soon after (this covers the case where we search for /abc but match /abc/ with
# a trailing dir seperator).
regex = re.compile(
b"("
+ b"|".join(re.escape(p) for p in prefix_to_prefix.keys())
+ b")([^\0]{0,%d}\0)?" % suffix_safety_size
)
# We *could* read binary data in chunks to avoid loading all in memory,
# but it's nasty to deal with matches across boundaries, so let's stick to
# something simple.
for match in regex.finditer(f.read()):
# The matching prefix (old) and its replacement (new)
old = match.group(1)
new = prefix_to_prefix[old]
# Did we find a trailing null within a N + 1 bytes window after the prefix?
null_terminated = match.end(0) > match.end(1)
# Suffix string length, excluding the null byte
# Only makes sense if null_terminated
suffix_strlen = match.end(0) - match.end(1) - 1
# How many bytes are we shrinking our string?
bytes_shorter = len(old) - len(new)
# We can't make strings larger.
if bytes_shorter < 0:
raise CannotGrowString(old, new)
# If we don't know whether this is a null terminated C-string (we're looking
# only N + 1 bytes ahead), or if it is and we have a common suffix, we can
# simply pad with leading dir separators.
elif (
not null_terminated
or suffix_strlen >= suffix_safety_size # == is enough, but let's be defensive
or old[-suffix_safety_size + suffix_strlen :]
== new[-suffix_safety_size + suffix_strlen :]
):
replacement = b"/" * bytes_shorter + new
# If it *was* null terminated, all that matters is that we can leave N bytes
# of old suffix in place. Note that > is required since we also insert an
# additional null terminator.
elif bytes_shorter > suffix_safety_size:
replacement = new + match.group(2) # includes the trailing null
# Otherwise... we can't :(
else:
raise CannotShrinkCString(old, new, match.group()[:-1])
f.seek(match.start())
f.write(replacement)
def _replace_prefix_bin(filename, prefix_to_prefix):
"""Replace all the occurrences of the old prefix with a new prefix in binary
files. See :func:`~spack.relocate.apply_binary_replacements` for details.
Args:
filename (str): target binary file
byte_prefixes (OrderedDict): ordered dictionary where the keys are
bytes representing the old prefixes and the values are the new
prefixes (all bytes utf-8 encoded)
"""
with open(filename, "rb+") as f:
apply_binary_replacements(f, prefix_to_prefix)
def relocate_macho_binaries(
path_names,
old_layout_root,
@ -800,120 +656,32 @@ def relocate_links(links, prefix_to_prefix):
symlink(new_target, link)
def utf8_path_to_binary_regex(prefix):
"""Create a (binary) regex that matches the input path in utf8"""
prefix_bytes = re.escape(prefix).encode("utf-8")
return re.compile(b"(?<![\\w\\-_/])([\\w\\-_]*?)%s([\\w\\-_/]*)" % prefix_bytes)
def byte_strings_to_single_binary_regex(prefixes):
all_prefixes = b"|".join(re.escape(p) for p in prefixes)
return re.compile(b"(?<![\\w\\-_/])([\\w\\-_]*?)(%s)([\\w\\-_/]*)" % all_prefixes)
def utf8_paths_to_single_binary_regex(prefixes):
"""Create a (binary) regex that matches any input path in utf8"""
return byte_strings_to_single_binary_regex(p.encode("utf-8") for p in prefixes)
def _replace_prefix_text_file(file, regex, prefix_to_prefix):
"""Given a text file opened in rb+, substitute all old with new prefixes and write
in-place (file size may grow or shrink)."""
def replacement(match):
return match.group(1) + prefix_to_prefix[match.group(2)] + match.group(3)
data = file.read()
file.seek(0)
file.write(re.sub(regex, replacement, data))
file.truncate()
def _replace_prefix_text(filename, regex, prefix_to_prefix):
with open(filename, "rb+") as f:
_replace_prefix_text_file(f, regex, prefix_to_prefix)
def unsafe_relocate_text(files, prefixes, concurrency=32):
def relocate_text(files, prefixes):
"""Relocate text file from the original installation prefix to the
new prefix.
Relocation also affects the the path in Spack's sbang script.
Note: unsafe when files contains duplicates, such as repeated paths,
symlinks, hardlinks.
Args:
files (list): Text files to be relocated
prefixes (OrderedDict): String prefixes which need to be changed
concurrency (int): Preferred degree of parallelism
"""
# This now needs to be handled by the caller in all cases
# orig_sbang = '#!/bin/bash {0}/bin/sbang'.format(orig_spack)
# new_sbang = '#!/bin/bash {0}/bin/sbang'.format(new_spack)
# Transform to binary string
prefix_to_prefix = OrderedDict(
(k.encode("utf-8"), v.encode("utf-8")) for (k, v) in prefixes.items()
)
# Create a regex of the form (pre check)(prefix 1|prefix 2|prefix 3)(post check).
regex = byte_strings_to_single_binary_regex(prefix_to_prefix.keys())
args = [(filename, regex, prefix_to_prefix) for filename in files]
tp = multiprocessing.pool.ThreadPool(processes=concurrency)
try:
tp.map(llnl.util.lang.star(_replace_prefix_text), args)
finally:
tp.terminate()
tp.join()
TextFilePrefixReplacer.from_strings_or_bytes(prefixes).apply(files)
def unsafe_relocate_text_bin(binaries, prefixes, concurrency=32):
"""Replace null terminated path strings hard coded into binaries.
def relocate_text_bin(binaries, prefixes):
"""Replace null terminated path strings hard-coded into binaries.
The new install prefix must be shorter than the original one.
Note: unsafe when files contains duplicates, such as repeated paths,
symlinks, hardlinks.
Args:
binaries (list): binaries to be relocated
prefixes (OrderedDict): String prefixes which need to be changed.
concurrency (int): Desired degree of parallelism.
Raises:
BinaryTextReplaceError: when the new path is longer than the old path
spack.relocate_text.BinaryTextReplaceError: when the new path is longer than the old path
"""
byte_prefixes = collections.OrderedDict({})
for orig_prefix, new_prefix in prefixes.items():
if orig_prefix != new_prefix:
if isinstance(orig_prefix, bytes):
orig_bytes = orig_prefix
else:
orig_bytes = orig_prefix.encode("utf-8")
if isinstance(new_prefix, bytes):
new_bytes = new_prefix
else:
new_bytes = new_prefix.encode("utf-8")
byte_prefixes[orig_bytes] = new_bytes
# Do relocations on text in binaries that refers to the install tree
# multiprocesing.ThreadPool.map requires single argument
args = []
for binary in binaries:
args.append((binary, byte_prefixes))
tp = multiprocessing.pool.ThreadPool(processes=concurrency)
try:
tp.map(llnl.util.lang.star(_replace_prefix_bin), args)
finally:
tp.terminate()
tp.join()
BinaryFilePrefixReplacer.from_strings_or_bytes(prefixes).apply(binaries)
def is_relocatable(spec):

View file

@ -0,0 +1,288 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
"""This module contains pure-Python classes and functions for replacing
paths inside text files and binaries."""
import re
from collections import OrderedDict
from typing import Dict, Union
import spack.error
Prefix = Union[str, bytes]
def encode_path(p: Prefix) -> bytes:
return p if isinstance(p, bytes) else p.encode("utf-8")
def _prefix_to_prefix_as_bytes(prefix_to_prefix) -> Dict[bytes, bytes]:
return OrderedDict((encode_path(k), encode_path(v)) for (k, v) in prefix_to_prefix.items())
def utf8_path_to_binary_regex(prefix: str):
"""Create a binary regex that matches the input path in utf8"""
prefix_bytes = re.escape(prefix).encode("utf-8")
return re.compile(b"(?<![\\w\\-_/])([\\w\\-_]*?)%s([\\w\\-_/]*)" % prefix_bytes)
def _byte_strings_to_single_binary_regex(prefixes):
all_prefixes = b"|".join(re.escape(p) for p in prefixes)
return re.compile(b"(?<![\\w\\-_/])([\\w\\-_]*?)(%s)([\\w\\-_/]*)" % all_prefixes)
def utf8_paths_to_single_binary_regex(prefixes):
"""Create a (binary) regex that matches any input path in utf8"""
return _byte_strings_to_single_binary_regex(p.encode("utf-8") for p in prefixes)
def filter_identity_mappings(prefix_to_prefix):
"""Drop mappings that are not changed."""
# NOTE: we don't guard against the following case:
# [/abc/def -> /abc/def, /abc -> /x] *will* be simplified to
# [/abc -> /x], meaning that after this simplification /abc/def will be
# mapped to /x/def instead of /abc/def. This should not be a problem.
return OrderedDict((k, v) for (k, v) in prefix_to_prefix.items() if k != v)
class PrefixReplacer:
"""Base class for applying a prefix to prefix map
to a list of binaries or text files.
Child classes implement _apply_to_file to do the
actual work, which is different when it comes to
binaries and text files."""
def __init__(self, prefix_to_prefix: Dict[bytes, bytes]):
"""
Arguments:
prefix_to_prefix (OrderedDict):
A ordered mapping from prefix to prefix. The order is
relevant to support substring fallbacks, for example
[("/first/sub", "/x"), ("/first", "/y")] will ensure
/first/sub is matched and replaced before /first.
"""
self.prefix_to_prefix = filter_identity_mappings(prefix_to_prefix)
@property
def is_noop(self) -> bool:
"""Returns true when the prefix to prefix map
is mapping everything to the same location (identity)
or there are no prefixes to replace."""
return not bool(self.prefix_to_prefix)
def apply(self, filenames: list):
if self.is_noop:
return
for filename in filenames:
self.apply_to_filename(filename)
def apply_to_filename(self, filename):
if self.is_noop:
return
with open(filename, "rb+") as f:
self.apply_to_file(f)
def apply_to_file(self, f):
if self.is_noop:
return
self._apply_to_file(f)
class TextFilePrefixReplacer(PrefixReplacer):
"""This class applies prefix to prefix mappings for relocation
on text files.
Note that UTF-8 encoding is assumed."""
def __init__(self, prefix_to_prefix: Dict[bytes, bytes]):
"""
prefix_to_prefix (OrderedDict): OrderedDictionary where the keys are
bytes representing the old prefixes and the values are the new.
"""
super().__init__(prefix_to_prefix)
# Single regex for all paths.
self.regex = _byte_strings_to_single_binary_regex(self.prefix_to_prefix.keys())
@classmethod
def from_strings_or_bytes(
cls, prefix_to_prefix: Dict[Prefix, Prefix]
) -> "TextFilePrefixReplacer":
"""Create a TextFilePrefixReplacer from an ordered prefix to prefix map."""
return cls(_prefix_to_prefix_as_bytes(prefix_to_prefix))
def _apply_to_file(self, f):
"""Text replacement implementation simply reads the entire file
in memory and applies the combined regex."""
replacement = lambda m: m.group(1) + self.prefix_to_prefix[m.group(2)] + m.group(3)
data = f.read()
new_data = re.sub(self.regex, replacement, data)
if id(data) == id(new_data):
return
f.seek(0)
f.write(new_data)
f.truncate()
class BinaryFilePrefixReplacer(PrefixReplacer):
def __init__(self, prefix_to_prefix, suffix_safety_size=7):
"""
prefix_to_prefix (OrderedDict): OrderedDictionary where the keys are
bytes representing the old prefixes and the values are the new
suffix_safety_size (int): in case of null terminated strings, what size
of the suffix should remain to avoid aliasing issues?
"""
assert suffix_safety_size >= 0
super().__init__(prefix_to_prefix)
self.suffix_safety_size = suffix_safety_size
self.regex = self.binary_text_regex(self.prefix_to_prefix.keys(), suffix_safety_size)
@classmethod
def binary_text_regex(cls, binary_prefixes, suffix_safety_size=7):
"""
Create a regex that looks for exact matches of prefixes, and also tries to
match a C-string type null terminator in a small lookahead window.
Arguments:
binary_prefixes (list): List of byte strings of prefixes to match
suffix_safety_size (int): Sizeof the lookahed for null-terminated string.
Returns: compiled regex
"""
return re.compile(
b"("
+ b"|".join(re.escape(p) for p in binary_prefixes)
+ b")([^\0]{0,%d}\0)?" % suffix_safety_size
)
@classmethod
def from_strings_or_bytes(
cls, prefix_to_prefix: Dict[Prefix, Prefix], suffix_safety_size: int = 7
) -> "BinaryFilePrefixReplacer":
"""Create a BinaryFilePrefixReplacer from an ordered prefix to prefix map.
Arguments:
prefix_to_prefix (OrderedDict): Ordered mapping of prefix to prefix.
suffix_safety_size (int): Number of bytes to retain at the end of a C-string
to avoid binary string-aliasing issues.
"""
return cls(_prefix_to_prefix_as_bytes(prefix_to_prefix), suffix_safety_size)
def _apply_to_file(self, f):
"""
Given a file opened in rb+ mode, apply the string replacements as
specified by an ordered dictionary of prefix to prefix mappings. This
method takes special care of null-terminated C-strings. C-string constants
are problematic because compilers and linkers optimize readonly strings for
space by aliasing those that share a common suffix (only suffix since all
of them are null terminated). See https://github.com/spack/spack/pull/31739
and https://github.com/spack/spack/pull/32253 for details. Our logic matches
the original prefix with a ``suffix_safety_size + 1`` lookahead for null bytes.
If no null terminator is found, we simply pad with leading /, assuming that
it's a long C-string; the full C-string after replacement has a large suffix
in common with its original value.
If there *is* a null terminator we can do the same as long as the replacement
has a sufficiently long common suffix with the original prefix.
As a last resort when the replacement does not have a long enough common suffix,
we can try to shorten the string, but this only works if the new length is
sufficiently short (typically the case when going from large padding -> normal path)
If the replacement string is longer, or all of the above fails, we error out.
Arguments:
f: file opened in rb+ mode
"""
assert f.tell() == 0
# We *could* read binary data in chunks to avoid loading all in memory,
# but it's nasty to deal with matches across boundaries, so let's stick to
# something simple.
for match in self.regex.finditer(f.read()):
# The matching prefix (old) and its replacement (new)
old = match.group(1)
new = self.prefix_to_prefix[old]
# Did we find a trailing null within a N + 1 bytes window after the prefix?
null_terminated = match.end(0) > match.end(1)
# Suffix string length, excluding the null byte
# Only makes sense if null_terminated
suffix_strlen = match.end(0) - match.end(1) - 1
# How many bytes are we shrinking our string?
bytes_shorter = len(old) - len(new)
# We can't make strings larger.
if bytes_shorter < 0:
raise CannotGrowString(old, new)
# If we don't know whether this is a null terminated C-string (we're looking
# only N + 1 bytes ahead), or if it is and we have a common suffix, we can
# simply pad with leading dir separators.
elif (
not null_terminated
or suffix_strlen >= self.suffix_safety_size # == is enough, but let's be defensive
or old[-self.suffix_safety_size + suffix_strlen :]
== new[-self.suffix_safety_size + suffix_strlen :]
):
replacement = b"/" * bytes_shorter + new
# If it *was* null terminated, all that matters is that we can leave N bytes
# of old suffix in place. Note that > is required since we also insert an
# additional null terminator.
elif bytes_shorter > self.suffix_safety_size:
replacement = new + match.group(2) # includes the trailing null
# Otherwise... we can't :(
else:
raise CannotShrinkCString(old, new, match.group()[:-1])
f.seek(match.start())
f.write(replacement)
class BinaryStringReplacementError(spack.error.SpackError):
def __init__(self, file_path, old_len, new_len):
"""The size of the file changed after binary path substitution
Args:
file_path (str): file with changing size
old_len (str): original length of the file
new_len (str): length of the file after substitution
"""
super(BinaryStringReplacementError, self).__init__(
"Doing a binary string replacement in %s failed.\n"
"The size of the file changed from %s to %s\n"
"when it should have remanined the same." % (file_path, old_len, new_len)
)
class BinaryTextReplaceError(spack.error.SpackError):
def __init__(self, msg):
msg += (
" To fix this, compile with more padding "
"(config:install_tree:padded_length), or install to a shorter prefix."
)
super(BinaryTextReplaceError, self).__init__(msg)
class CannotGrowString(BinaryTextReplaceError):
def __init__(self, old, new):
msg = "Cannot replace {!r} with {!r} because the new prefix is longer.".format(old, new)
super(CannotGrowString, self).__init__(msg)
class CannotShrinkCString(BinaryTextReplaceError):
def __init__(self, old, new, full_old_string):
# Just interpolate binary string to not risk issues with invalid
# unicode, which would be really bad user experience: error in error.
# We have no clue if we actually deal with a real C-string nor what
# encoding it has.
msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
old, new, full_old_string
)
super(CannotShrinkCString, self).__init__(msg)

View file

@ -70,7 +70,7 @@ def rewire_node(spec, explicit):
for rel_path in manifest.get("text_to_relocate", [])
]
if text_to_relocate:
relocate.unsafe_relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix)
relocate.relocate_text(files=text_to_relocate, prefixes=prefix_to_prefix)
bins_to_relocate = [
os.path.join(tempdir, spec.dag_hash(), rel_path)
@ -97,7 +97,7 @@ def rewire_node(spec, explicit):
spec.build_spec.prefix,
spec.prefix,
)
relocate.unsafe_relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix)
relocate.relocate_text_bin(binaries=bins_to_relocate, prefixes=prefix_to_prefix)
# Copy package into place, except for spec.json (because spec.json
# describes the old spec and not the new spliced spec).
shutil.copytree(

View file

@ -36,7 +36,7 @@
needs_binary_relocation,
needs_text_relocation,
relocate_links,
unsafe_relocate_text,
relocate_text,
)
from spack.spec import Spec
@ -190,7 +190,7 @@ def test_buildcache(mock_archive, tmpdir):
@pytest.mark.usefixtures("install_mockery")
def test_unsafe_relocate_text(tmpdir):
def test_relocate_text(tmpdir):
spec = Spec("trivial-install-test-package")
spec.concretize()
with tmpdir.as_cwd():
@ -203,7 +203,7 @@ def test_unsafe_relocate_text(tmpdir):
filenames = [filename]
new_dir = "/opt/rh/devtoolset/"
# Singleton dict doesn't matter if Ordered
unsafe_relocate_text(filenames, {old_dir: new_dir})
relocate_text(filenames, {old_dir: new_dir})
with open(filename, "r") as script:
for line in script:
assert new_dir in line

View file

@ -2,13 +2,11 @@
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import io
import os
import os.path
import re
import shutil
import sys
from collections import OrderedDict
import pytest
@ -18,11 +16,11 @@
import spack.paths
import spack.platforms
import spack.relocate
import spack.relocate_text as relocate_text
import spack.spec
import spack.store
import spack.tengine
import spack.util.executable
from spack.relocate import utf8_path_to_binary_regex, utf8_paths_to_single_binary_regex
pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="Tests fail on Windows")
@ -269,7 +267,7 @@ def test_set_elf_rpaths_warning(mock_patchelf):
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
def test_replace_prefix_bin(binary_with_rpaths, prefix_like):
def test_relocate_text_bin(binary_with_rpaths, prefix_like):
prefix = "/usr/" + prefix_like
prefix_bytes = prefix.encode("utf-8")
new_prefix = "/foo/" + prefix_like
@ -278,7 +276,7 @@ def test_replace_prefix_bin(binary_with_rpaths, prefix_like):
executable = binary_with_rpaths(rpaths=[prefix + "/lib", prefix + "/lib64"])
# Relocate the RPATHs
spack.relocate._replace_prefix_bin(str(executable), {prefix_bytes: new_prefix_bytes})
spack.relocate.relocate_text_bin([str(executable)], {prefix_bytes: new_prefix_bytes})
# Some compilers add rpaths so ensure changes included in final result
assert "%s/lib:%s/lib64" % (new_prefix, new_prefix) in rpaths_for(executable)
@ -349,7 +347,7 @@ def test_make_elf_binaries_relative(binary_with_rpaths, copy_binary, prefix_tmpd
@pytest.mark.requires_executables("patchelf", "strings", "file", "gcc")
@skip_unless_linux
def test_relocate_text_bin(binary_with_rpaths, copy_binary, prefix_tmpdir):
def test_relocate_text_bin_with_message(binary_with_rpaths, copy_binary, prefix_tmpdir):
orig_binary = binary_with_rpaths(
rpaths=[
str(prefix_tmpdir.mkdir("lib")),
@ -368,7 +366,7 @@ def test_relocate_text_bin(binary_with_rpaths, copy_binary, prefix_tmpdir):
orig_path_bytes = str(orig_binary.dirpath()).encode("utf-8")
new_path_bytes = str(new_binary.dirpath()).encode("utf-8")
spack.relocate.unsafe_relocate_text_bin([str(new_binary)], {orig_path_bytes: new_path_bytes})
spack.relocate.relocate_text_bin([str(new_binary)], {orig_path_bytes: new_path_bytes})
# Check original directory is not there anymore and it was
# substituted with the new one
@ -382,8 +380,8 @@ def test_relocate_text_bin_raise_if_new_prefix_is_longer(tmpdir):
fpath = str(tmpdir.join("fakebin"))
with open(fpath, "w") as f:
f.write("/short")
with pytest.raises(spack.relocate.BinaryTextReplaceError):
spack.relocate.unsafe_relocate_text_bin([fpath], {short_prefix: long_prefix})
with pytest.raises(relocate_text.BinaryTextReplaceError):
spack.relocate.relocate_text_bin([fpath], {short_prefix: long_prefix})
@pytest.mark.requires_executables("install_name_tool", "file", "cc")
@ -438,227 +436,3 @@ def test_fixup_macos_rpaths(make_dylib, make_object_file):
# (this is a corner case for GCC installation)
(root, filename) = make_object_file()
assert not fixup_rpath(root, filename)
def test_text_relocation_regex_is_safe():
# Test whether prefix regex is properly escaped
string = b"This does not match /a/, but this does: /[a-z]/."
assert utf8_path_to_binary_regex("/[a-z]/").search(string).group(0) == b"/[a-z]/"
def test_utf8_paths_to_single_binary_regex():
regex = utf8_paths_to_single_binary_regex(["/first/path", "/second/path", "/safe/[a-z]"])
# Match nothing
assert not regex.search(b"text /neither/first/path text /the/second/path text")
# Match first
string = b"contains both /first/path/subdir and /second/path/sub"
assert regex.search(string).group(0) == b"/first/path/subdir"
# Match second
string = b"contains both /not/first/path/subdir but /second/path/subdir"
assert regex.search(string).group(0) == b"/second/path/subdir"
# Match "unsafe" dir name
string = b"don't match /safe/a/path but do match /safe/[a-z]/file"
assert regex.search(string).group(0) == b"/safe/[a-z]/file"
def test_ordered_replacement():
# This tests whether binary text replacement respects order, so that
# a long package prefix is replaced before a shorter sub-prefix like
# the root of the spack store (as a fallback).
def replace_and_expect(prefix_map, before, after=None, suffix_safety_size=7):
f = io.BytesIO(before)
spack.relocate.apply_binary_replacements(f, OrderedDict(prefix_map), suffix_safety_size)
f.seek(0)
assert f.read() == after
# The case of having a non-null terminated common suffix.
replace_and_expect(
[
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
(b"/old-spack/opt", b"/sec/spack/opt"),
],
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
b"Binary with /////////first/specific-package and /sec/spack/opt",
suffix_safety_size=7,
)
# The case of having a direct null terminated common suffix.
replace_and_expect(
[
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
(b"/old-spack/opt", b"/sec/spack/opt"),
],
b"Binary with /old-spack/opt/specific-package\0 and /old-spack/opt\0",
b"Binary with /////////first/specific-package\0 and /sec/spack/opt\0",
suffix_safety_size=7,
)
# Testing the order of operations (not null terminated, long enough common suffix)
replace_and_expect(
[
(b"/old-spack/opt", b"/s/spack/opt"),
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
],
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
b"Binary with ///s/spack/opt/specific-package and ///s/spack/opt",
suffix_safety_size=7,
)
# Testing the order of operations (null terminated, long enough common suffix)
replace_and_expect(
[
(b"/old-spack/opt", b"/s/spack/opt"),
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
],
b"Binary with /old-spack/opt/specific-package\0 and /old-spack/opt\0",
b"Binary with ///s/spack/opt/specific-package\0 and ///s/spack/opt\0",
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix long enough
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/opt/specific-XXXXage")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with ///////////opt/specific-XXXXage/sub\0 data",
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short, but
# shortening is enough to spare more than 7 bytes of old suffix.
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/opt/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /opt/specific-XXXXXge/sub\0ckage/sub\0 data", # ckage/sub = 9 bytes
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short,
# shortening leaves exactly 7 suffix bytes untouched, amazing!
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/spack/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /spack/specific-XXXXXge/sub\0age/sub\0 data", # age/sub = 7 bytes
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short,
# shortening doesn't leave space for 7 bytes, sad!
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"/old-spack/opt/specific-package",
b"/snacks/specific-XXXXXge",
b"/old-spack/opt/specific-package/sub",
)
with pytest.raises(spack.relocate.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/snacks/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
# expect failure!
suffix_safety_size=7,
)
# Check that it works when changing suffix_safety_size.
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/snacks/specific-XXXXXXe")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /snacks/specific-XXXXXXe/sub\0ge/sub\0 data",
suffix_safety_size=6,
)
# Finally check the case of no shortening but a long enough common suffix.
replace_and_expect(
[(b"pkg-gwixwaalgczp6", b"pkg-zkesfralgczp6")],
b"Binary with pkg-gwixwaalgczp6/config\0 data",
b"Binary with pkg-zkesfralgczp6/config\0 data",
suffix_safety_size=7,
)
# Too short matching suffix, identical string length
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"pkg-gwixwaxlgczp6",
b"pkg-zkesfrzlgczp6",
b"pkg-gwixwaxlgczp6",
)
with pytest.raises(spack.relocate.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"pkg-gwixwaxlgczp6", b"pkg-zkesfrzlgczp6")],
b"Binary with pkg-gwixwaxlgczp6\0 data",
# expect failure
suffix_safety_size=7,
)
# Finally, make sure that the regex is not greedily finding the LAST null byte
# it should find the first null byte in the window. In this test we put one null
# at a distance where we cant keep a long enough suffix, and one where we can,
# so we should expect failure when the first null is used.
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"pkg-abcdef",
b"pkg-xyzabc",
b"pkg-abcdef",
)
with pytest.raises(spack.relocate.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"pkg-abcdef", b"pkg-xyzabc")],
b"Binary with pkg-abcdef\0/xx\0", # def\0/xx is 7 bytes.
# expect failure
suffix_safety_size=7,
)
def test_inplace_text_replacement():
def replace_and_expect(prefix_to_prefix, before: bytes, after: bytes):
f = io.BytesIO(before)
prefix_to_prefix = OrderedDict(prefix_to_prefix)
regex = spack.relocate.byte_strings_to_single_binary_regex(prefix_to_prefix.keys())
spack.relocate._replace_prefix_text_file(f, regex, prefix_to_prefix)
f.seek(0)
assert f.read() == after
replace_and_expect(
[
(b"/first/prefix", b"/first-replacement/prefix"),
(b"/second/prefix", b"/second-replacement/prefix"),
],
b"Example: /first/prefix/subdir and /second/prefix/subdir",
b"Example: /first-replacement/prefix/subdir and /second-replacement/prefix/subdir",
)
replace_and_expect(
[
(b"/replace/in/order", b"/first"),
(b"/replace/in", b"/second"),
(b"/replace", b"/third"),
],
b"/replace/in/order/x /replace/in/y /replace/z",
b"/first/x /second/y /third/z",
)
replace_and_expect(
[
(b"/replace", b"/third"),
(b"/replace/in", b"/second"),
(b"/replace/in/order", b"/first"),
],
b"/replace/in/order/x /replace/in/y /replace/z",
b"/third/in/order/x /third/in/y /third/z",
)
replace_and_expect(
[(b"/my/prefix", b"/replacement")],
b"/dont/replace/my/prefix #!/dont/replace/my/prefix",
b"/dont/replace/my/prefix #!/dont/replace/my/prefix",
)
replace_and_expect(
[(b"/my/prefix", b"/replacement")],
b"Install path: /my/prefix.",
b"Install path: /replacement.",
)
replace_and_expect(
[(b"/my/prefix", b"/replacement")],
b"#!/my/prefix",
b"#!/replacement",
)

View file

@ -0,0 +1,247 @@
# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import io
from collections import OrderedDict
import pytest
import spack.relocate_text as relocate_text
def test_text_relocation_regex_is_safe():
# Test whether prefix regex is properly escaped
string = b"This does not match /a/, but this does: /[a-z]/."
assert relocate_text.utf8_path_to_binary_regex("/[a-z]/").search(string).group(0) == b"/[a-z]/"
def test_utf8_paths_to_single_binary_regex():
regex = relocate_text.utf8_paths_to_single_binary_regex(
["/first/path", "/second/path", "/safe/[a-z]"]
)
# Match nothing
assert not regex.search(b"text /neither/first/path text /the/second/path text")
# Match first
string = b"contains both /first/path/subdir and /second/path/sub"
assert regex.search(string).group(0) == b"/first/path/subdir"
# Match second
string = b"contains both /not/first/path/subdir but /second/path/subdir"
assert regex.search(string).group(0) == b"/second/path/subdir"
# Match "unsafe" dir name
string = b"don't match /safe/a/path but do match /safe/[a-z]/file"
assert regex.search(string).group(0) == b"/safe/[a-z]/file"
def test_ordered_replacement():
# This tests whether binary text replacement respects order, so that
# a long package prefix is replaced before a shorter sub-prefix like
# the root of the spack store (as a fallback).
def replace_and_expect(prefix_map, before, after=None, suffix_safety_size=7):
f = io.BytesIO(before)
relocater = relocate_text.BinaryFilePrefixReplacer(
OrderedDict(prefix_map), suffix_safety_size
)
relocater.apply_to_file(f)
f.seek(0)
assert f.read() == after
# The case of having a non-null terminated common suffix.
replace_and_expect(
[
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
(b"/old-spack/opt", b"/sec/spack/opt"),
],
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
b"Binary with /////////first/specific-package and /sec/spack/opt",
suffix_safety_size=7,
)
# The case of having a direct null terminated common suffix.
replace_and_expect(
[
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
(b"/old-spack/opt", b"/sec/spack/opt"),
],
b"Binary with /old-spack/opt/specific-package\0 and /old-spack/opt\0",
b"Binary with /////////first/specific-package\0 and /sec/spack/opt\0",
suffix_safety_size=7,
)
# Testing the order of operations (not null terminated, long enough common suffix)
replace_and_expect(
[
(b"/old-spack/opt", b"/s/spack/opt"),
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
],
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
b"Binary with ///s/spack/opt/specific-package and ///s/spack/opt",
suffix_safety_size=7,
)
# Testing the order of operations (null terminated, long enough common suffix)
replace_and_expect(
[
(b"/old-spack/opt", b"/s/spack/opt"),
(b"/old-spack/opt/specific-package", b"/first/specific-package"),
],
b"Binary with /old-spack/opt/specific-package\0 and /old-spack/opt\0",
b"Binary with ///s/spack/opt/specific-package\0 and ///s/spack/opt\0",
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix long enough
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/opt/specific-XXXXage")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with ///////////opt/specific-XXXXage/sub\0 data",
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short, but
# shortening is enough to spare more than 7 bytes of old suffix.
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/opt/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /opt/specific-XXXXXge/sub\0ckage/sub\0 data", # ckage/sub = 9 bytes
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short,
# shortening leaves exactly 7 suffix bytes untouched, amazing!
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/spack/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /spack/specific-XXXXXge/sub\0age/sub\0 data", # age/sub = 7 bytes
suffix_safety_size=7,
)
# Null terminated within the lookahead window, common suffix too short,
# shortening doesn't leave space for 7 bytes, sad!
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"/old-spack/opt/specific-package",
b"/snacks/specific-XXXXXge",
b"/old-spack/opt/specific-package/sub",
)
with pytest.raises(relocate_text.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/snacks/specific-XXXXXge")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
# expect failure!
suffix_safety_size=7,
)
# Check that it works when changing suffix_safety_size.
replace_and_expect(
[(b"/old-spack/opt/specific-package", b"/snacks/specific-XXXXXXe")],
b"Binary with /old-spack/opt/specific-package/sub\0 data",
b"Binary with /snacks/specific-XXXXXXe/sub\0ge/sub\0 data",
suffix_safety_size=6,
)
# Finally check the case of no shortening but a long enough common suffix.
replace_and_expect(
[(b"pkg-gwixwaalgczp6", b"pkg-zkesfralgczp6")],
b"Binary with pkg-gwixwaalgczp6/config\0 data",
b"Binary with pkg-zkesfralgczp6/config\0 data",
suffix_safety_size=7,
)
# Too short matching suffix, identical string length
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"pkg-gwixwaxlgczp6",
b"pkg-zkesfrzlgczp6",
b"pkg-gwixwaxlgczp6",
)
with pytest.raises(relocate_text.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"pkg-gwixwaxlgczp6", b"pkg-zkesfrzlgczp6")],
b"Binary with pkg-gwixwaxlgczp6\0 data",
# expect failure
suffix_safety_size=7,
)
# Finally, make sure that the regex is not greedily finding the LAST null byte
# it should find the first null byte in the window. In this test we put one null
# at a distance where we cant keep a long enough suffix, and one where we can,
# so we should expect failure when the first null is used.
error_msg = "Cannot replace {!r} with {!r} in the C-string {!r}.".format(
b"pkg-abcdef",
b"pkg-xyzabc",
b"pkg-abcdef",
)
with pytest.raises(relocate_text.CannotShrinkCString, match=error_msg):
replace_and_expect(
[(b"pkg-abcdef", b"pkg-xyzabc")],
b"Binary with pkg-abcdef\0/xx\0", # def\0/xx is 7 bytes.
# expect failure
suffix_safety_size=7,
)
def test_inplace_text_replacement():
def replace_and_expect(prefix_to_prefix, before: bytes, after: bytes):
f = io.BytesIO(before)
replacer = relocate_text.TextFilePrefixReplacer(OrderedDict(prefix_to_prefix))
replacer.apply_to_file(f)
f.seek(0)
assert f.read() == after
replace_and_expect(
[
(b"/first/prefix", b"/first-replacement/prefix"),
(b"/second/prefix", b"/second-replacement/prefix"),
],
b"Example: /first/prefix/subdir and /second/prefix/subdir",
b"Example: /first-replacement/prefix/subdir and /second-replacement/prefix/subdir",
)
replace_and_expect(
[
(b"/replace/in/order", b"/first"),
(b"/replace/in", b"/second"),
(b"/replace", b"/third"),
],
b"/replace/in/order/x /replace/in/y /replace/z",
b"/first/x /second/y /third/z",
)
replace_and_expect(
[
(b"/replace", b"/third"),
(b"/replace/in", b"/second"),
(b"/replace/in/order", b"/first"),
],
b"/replace/in/order/x /replace/in/y /replace/z",
b"/third/in/order/x /third/in/y /third/z",
)
replace_and_expect(
[(b"/my/prefix", b"/replacement")],
b"/dont/replace/my/prefix #!/dont/replace/my/prefix",
b"/dont/replace/my/prefix #!/dont/replace/my/prefix",
)
replace_and_expect(
[(b"/my/prefix", b"/replacement")],
b"Install path: /my/prefix.",
b"Install path: /replacement.",
)
replace_and_expect(
[(b"/my/prefix", b"/replacement")],
b"#!/my/prefix",
b"#!/replacement",
)
def test_relocate_text_filters_redundant_entries():
# Test that we're filtering identical old / new paths, since that's a waste.
mapping = OrderedDict([("/hello", "/hello"), ("/world", "/world")])
replacer_1 = relocate_text.BinaryFilePrefixReplacer.from_strings_or_bytes(mapping)
replacer_2 = relocate_text.TextFilePrefixReplacer.from_strings_or_bytes(mapping)
assert not replacer_1.prefix_to_prefix
assert not replacer_2.prefix_to_prefix