_replace_prefix_bin performance improvements (#33590)
- single pass over the binary data matching all prefixes - collect offsets and replacement strings - do in-place updates with `fseek` / `fwrite`, since typically our replacement touch O(few bytes) while the file is O(many megabytes) - be nice: leave the file untouched if some string can't be replaced
This commit is contained in:
parent
fb4be98f26
commit
616d5a89d4
2 changed files with 51 additions and 26 deletions
|
@ -467,40 +467,35 @@ def _replace_prefix_bin(filename, byte_prefixes):
|
||||||
"""Replace all the occurrences of the old install prefix with a
|
"""Replace all the occurrences of the old install prefix with a
|
||||||
new install prefix in binary files.
|
new install prefix in binary files.
|
||||||
|
|
||||||
The new install prefix is prefixed with ``os.sep`` until the
|
The new install prefix is prefixed with ``b'/'`` until the
|
||||||
lengths of the prefixes are the same.
|
lengths of the prefixes are the same.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
filename (str): target binary file
|
filename (str): target binary file
|
||||||
byte_prefixes (OrderedDict): OrderedDictionary where the keys are
|
byte_prefixes (OrderedDict): OrderedDictionary where the keys are
|
||||||
precompiled regex of the old prefixes and the values are the new
|
binary strings of the old prefixes and the values are the new
|
||||||
prefixes (uft-8 encoded)
|
binary prefixes
|
||||||
"""
|
"""
|
||||||
|
all_prefixes = re.compile(b"|".join(re.escape(prefix) for prefix in byte_prefixes.keys()))
|
||||||
|
|
||||||
|
def padded_replacement(old):
|
||||||
|
new = byte_prefixes[old]
|
||||||
|
pad = len(old) - len(new)
|
||||||
|
if pad < 0:
|
||||||
|
raise BinaryTextReplaceError(old, new)
|
||||||
|
return new + b"/" * pad
|
||||||
|
|
||||||
with open(filename, "rb+") as f:
|
with open(filename, "rb+") as f:
|
||||||
data = f.read()
|
# Register what replacement string to put on what offsets in the file.
|
||||||
f.seek(0)
|
replacements_at_offset = [
|
||||||
for orig_bytes, new_bytes in byte_prefixes.items():
|
(padded_replacement(m.group(0)), m.start())
|
||||||
original_data_len = len(data)
|
for m in re.finditer(all_prefixes, f.read())
|
||||||
# Skip this hassle if not found
|
]
|
||||||
if orig_bytes not in data:
|
|
||||||
continue
|
# Apply the replacements
|
||||||
# We only care about this problem if we are about to replace
|
for replacement, offset in replacements_at_offset:
|
||||||
length_compatible = len(new_bytes) <= len(orig_bytes)
|
f.seek(offset)
|
||||||
if not length_compatible:
|
f.write(replacement)
|
||||||
tty.debug("Binary failing to relocate is %s" % filename)
|
|
||||||
raise BinaryTextReplaceError(orig_bytes, new_bytes)
|
|
||||||
pad_length = len(orig_bytes) - len(new_bytes)
|
|
||||||
padding = os.sep * pad_length
|
|
||||||
padding = padding.encode("utf-8")
|
|
||||||
data = data.replace(orig_bytes, new_bytes + padding)
|
|
||||||
# Really needs to be the same length
|
|
||||||
if not len(data) == original_data_len:
|
|
||||||
print("Length of pad:", pad_length, "should be", len(padding))
|
|
||||||
print(new_bytes, "was to replace", orig_bytes)
|
|
||||||
raise BinaryStringReplacementError(filename, original_data_len, len(data))
|
|
||||||
f.write(data)
|
|
||||||
f.truncate()
|
|
||||||
|
|
||||||
|
|
||||||
def relocate_macho_binaries(
|
def relocate_macho_binaries(
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
@ -501,3 +502,32 @@ def test_utf8_paths_to_single_binary_regex():
|
||||||
# Match "unsafe" dir name
|
# Match "unsafe" dir name
|
||||||
string = b"don't match /safe/a/path but do match /safe/[a-z]/file"
|
string = b"don't match /safe/a/path but do match /safe/[a-z]/file"
|
||||||
assert regex.search(string).group(0) == b"/safe/[a-z]/file"
|
assert regex.search(string).group(0) == b"/safe/[a-z]/file"
|
||||||
|
|
||||||
|
|
||||||
|
def test_ordered_replacement(tmpdir):
|
||||||
|
# This tests whether binary text replacement respects order, so that
|
||||||
|
# a long package prefix is replaced before a shorter sub-prefix like
|
||||||
|
# the root of the spack store (as a fallback).
|
||||||
|
def replace_and_expect(prefix_map, before, after):
|
||||||
|
file = str(tmpdir.join("file"))
|
||||||
|
with open(file, "wb") as f:
|
||||||
|
f.write(before)
|
||||||
|
spack.relocate._replace_prefix_bin(file, prefix_map)
|
||||||
|
with open(file, "rb") as f:
|
||||||
|
assert f.read() == after
|
||||||
|
|
||||||
|
replace_and_expect(
|
||||||
|
OrderedDict(
|
||||||
|
[(b"/old-spack/opt/specific-package", b"/first"), (b"/old-spack/opt", b"/second")]
|
||||||
|
),
|
||||||
|
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
|
||||||
|
b"Binary with /first///////////////////////// and /second///////",
|
||||||
|
)
|
||||||
|
|
||||||
|
replace_and_expect(
|
||||||
|
OrderedDict(
|
||||||
|
[(b"/old-spack/opt", b"/second"), (b"/old-spack/opt/specific-package", b"/first")]
|
||||||
|
),
|
||||||
|
b"Binary with /old-spack/opt/specific-package and /old-spack/opt",
|
||||||
|
b"Binary with /second////////specific-package and /second///////",
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in a new issue