_replace_prefix_bin performance improvements (#33590)
- single pass over the binary data matching all prefixes - collect offsets and replacement strings - do in-place updates with `fseek` / `fwrite`, since typically our replacement touch O(few bytes) while the file is O(many megabytes) - be nice: leave the file untouched if some string can't be replaced
This commit is contained in:
parent
fb4be98f26
commit
616d5a89d4
2 changed files with 51 additions and 26 deletions
|
@ -467,40 +467,35 @@ def _replace_prefix_bin(filename, byte_prefixes):
|
|||
"""Replace all the occurrences of the old install prefix with a
|
||||
new install prefix in binary files.
|
||||
|
||||
The new install prefix is prefixed with ``os.sep`` until the
|
||||
The new install prefix is prefixed with ``b'/'`` until the
|
||||
lengths of the prefixes are the same.
|
||||
|
||||
Args:
|
||||
filename (str): target binary file
|
||||
byte_prefixes (OrderedDict): OrderedDictionary where the keys are
|
||||
precompiled regex of the old prefixes and the values are the new
|
||||
prefixes (uft-8 encoded)
|
||||
binary strings of the old prefixes and the values are the new
|
||||
binary prefixes
|
||||
"""
|
||||
all_prefixes = re.compile(b"|".join(re.escape(prefix) for prefix in byte_prefixes.keys()))
|
||||
|
||||
def padded_replacement(old):
|
||||
new = byte_prefixes[old]
|
||||
pad = len(old) - len(new)
|
||||
if pad < 0:
|
||||
raise BinaryTextReplaceError(old, new)
|
||||
return new + b"/" * pad
|
||||
|
||||
with open(filename, "rb+") as f:
|
||||
data = f.read()
|
||||
f.seek(0)
|
||||
for orig_bytes, new_bytes in byte_prefixes.items():
|
||||
original_data_len = len(data)
|
||||
# Skip this hassle if not found
|
||||
if orig_bytes not in data:
|
||||
continue
|
||||
# We only care about this problem if we are about to replace
|
||||
length_compatible = len(new_bytes) <= len(orig_bytes)
|
||||
if not length_compatible:
|
||||
tty.debug("Binary failing to relocate is %s" % filename)
|
||||
raise BinaryTextReplaceError(orig_bytes, new_bytes)
|
||||
pad_length = len(orig_bytes) - len(new_bytes)
|
||||
padding = os.sep * pad_length
|
||||
padding = padding.encode("utf-8")
|
||||
data = data.replace(orig_bytes, new_bytes + padding)
|
||||
# Really needs to be the same length
|
||||
if not len(data) == original_data_len:
|
||||
print("Length of pad:", pad_length, "should be", len(padding))
|
||||
print(new_bytes, "was to replace", orig_bytes)
|
||||
raise BinaryStringReplacementError(filename, original_data_len, len(data))
|
||||
f.write(data)
|
||||
f.truncate()
|
||||
# Register what replacement string to put on what offsets in the file.
|
||||
replacements_at_offset = [
|
||||
(padded_replacement(m.group(0)), m.start())
|
||||
for m in re.finditer(all_prefixes, f.read())
|
||||
]
|
||||
|
||||
# Apply the replacements
|
||||
for replacement, offset in replacements_at_offset:
|
||||
f.seek(offset)
|
||||
f.write(replacement)
|
||||
|
||||
|
||||
def relocate_macho_binaries(
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -501,3 +502,32 @@ def test_utf8_paths_to_single_binary_regex():
|
|||
# Match "unsafe" dir name
|
||||
string = b"don't match /safe/a/path but do match /safe/[a-z]/file"
|
||||
assert regex.search(string).group(0) == b"/safe/[a-z]/file"
|
||||
|
||||
|
||||
def test_ordered_replacement(tmpdir):
|
||||
# This tests whether binary text replacement respects order, so that
|
||||
# a long package prefix is replaced before a shorter sub-prefix like
|
||||
# the root of the spack store (as a fallback).
|
||||
def replace_and_expect(prefix_map, before, after):
|
||||
file = str(tmpdir.join("file"))
|
||||
with open(file, "wb") as f:
|
||||
f.write(before)
|
||||
spack.relocate._replace_prefix_bin(file, prefix_map)
|
||||
with open(file, "rb") as f:
|
||||
assert f.read() == after
|
||||
|
||||
replace_and_expect(
|
||||
OrderedDict(
|
||||
[(b"/old-spack/opt/specific-package", b"/first"), (b"/old-spack/opt", b"/second")]
|
||||
),
|
||||
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
|
||||
b"Binary with /first///////////////////////// and /second///////",
|
||||
)
|
||||
|
||||
replace_and_expect(
|
||||
OrderedDict(
|
||||
[(b"/old-spack/opt", b"/second"), (b"/old-spack/opt/specific-package", b"/first")]
|
||||
),
|
||||
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
|
||||
b"Binary with /second////////specific-package and /second///////",
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue