elf: relocate PT_INTERP (#42318)
Relocation of `PT_INTERP` in ELF files already happens to work from long to short path, thanks to generic binary relocation (i.e. find and replace). This PR improves it: 1. Adds logic to grow `PT_INTERP` strings through patchelf (which is only useful if the interpreter and rpath paths are the _only_ paths in the binary that need to be relocated) 2. Makes shrinking `PT_INTERP` cleaner. Before this PR when you would use Spack-built glibc as link dep, and relocate executables using its dynamic linker, you'd end up with ``` $ file exe exe: ELF 64-bit LSD pie executable, ..., interpreter /////////////////////////////////////////////////path/to/glibc/lib/ld-linux.so ``` With this PR you get something sensible: ``` $ file exe exe: ELF 64-bit LSD pie executable, ..., interpreter /path/to/glibc/lib/ld-linux.so ``` When Spack cannot modify the interpreter or rpath strings in-place, it errors out without modifying the file, and leaves both tasks to patchelf instead. Also add type hints to `elf.py`.
This commit is contained in:
parent
6d55caabe8
commit
28eea2994f
7 changed files with 306 additions and 208 deletions
|
@ -542,7 +542,7 @@ def verify_patchelf(patchelf: "spack.util.executable.Executable") -> bool:
|
|||
return version >= spack.version.Version("0.13.1")
|
||||
|
||||
|
||||
def ensure_patchelf_in_path_or_raise() -> None:
|
||||
def ensure_patchelf_in_path_or_raise() -> spack.util.executable.Executable:
|
||||
"""Ensure patchelf is in the PATH or raise."""
|
||||
# The old concretizer is not smart and we're doing its job: if the latest patchelf
|
||||
# does not concretize because the compiler doesn't support C++17, we try to
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import os
|
||||
from typing import IO, Optional, Tuple
|
||||
from typing import BinaryIO, Optional, Tuple
|
||||
|
||||
import llnl.util.tty as tty
|
||||
from llnl.util.filesystem import BaseDirectoryVisitor, visit_directory_tree
|
||||
|
@ -18,7 +18,7 @@ def should_keep(path: bytes) -> bool:
|
|||
return path.startswith(b"$") or (os.path.isabs(path) and os.path.lexists(path))
|
||||
|
||||
|
||||
def _drop_redundant_rpaths(f: IO) -> Optional[Tuple[bytes, bytes]]:
|
||||
def _drop_redundant_rpaths(f: BinaryIO) -> Optional[Tuple[bytes, bytes]]:
|
||||
"""Drop redundant entries from rpath.
|
||||
|
||||
Args:
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
import os
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from typing import List, Optional
|
||||
|
||||
import macholib.mach_o
|
||||
import macholib.MachO
|
||||
|
@ -47,7 +48,7 @@ def __init__(self, file_path, root_path):
|
|||
|
||||
|
||||
@memoized
|
||||
def _patchelf():
|
||||
def _patchelf() -> Optional[executable.Executable]:
|
||||
"""Return the full path to the patchelf binary, if available, else None."""
|
||||
import spack.bootstrap
|
||||
|
||||
|
@ -55,9 +56,7 @@ def _patchelf():
|
|||
return None
|
||||
|
||||
with spack.bootstrap.ensure_bootstrap_configuration():
|
||||
patchelf = spack.bootstrap.ensure_patchelf_in_path_or_raise()
|
||||
|
||||
return patchelf.path
|
||||
return spack.bootstrap.ensure_patchelf_in_path_or_raise()
|
||||
|
||||
|
||||
def _elf_rpaths_for(path):
|
||||
|
@ -340,31 +339,34 @@ def macholib_get_paths(cur_path):
|
|||
return (rpaths, deps, ident)
|
||||
|
||||
|
||||
def _set_elf_rpaths(target, rpaths):
|
||||
"""Replace the original RPATH of the target with the paths passed
|
||||
as arguments.
|
||||
def _set_elf_rpaths_and_interpreter(
|
||||
target: str, rpaths: List[str], interpreter: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""Replace the original RPATH of the target with the paths passed as arguments.
|
||||
|
||||
Args:
|
||||
target: target executable. Must be an ELF object.
|
||||
rpaths: paths to be set in the RPATH
|
||||
interpreter: optionally set the interpreter
|
||||
|
||||
Returns:
|
||||
A string concatenating the stdout and stderr of the call
|
||||
to ``patchelf`` if it was invoked
|
||||
A string concatenating the stdout and stderr of the call to ``patchelf`` if it was invoked
|
||||
"""
|
||||
# Join the paths using ':' as a separator
|
||||
rpaths_str = ":".join(rpaths)
|
||||
|
||||
patchelf, output = executable.Executable(_patchelf()), None
|
||||
try:
|
||||
# TODO: error handling is not great here?
|
||||
# TODO: revisit the use of --force-rpath as it might be conditional
|
||||
# TODO: if we want to support setting RUNPATH from binary packages
|
||||
patchelf_args = ["--force-rpath", "--set-rpath", rpaths_str, target]
|
||||
output = patchelf(*patchelf_args, output=str, error=str)
|
||||
args = ["--force-rpath", "--set-rpath", rpaths_str]
|
||||
if interpreter:
|
||||
args.extend(["--set-interpreter", interpreter])
|
||||
args.append(target)
|
||||
return _patchelf()(*args, output=str, error=str)
|
||||
except executable.ProcessError as e:
|
||||
msg = "patchelf --force-rpath --set-rpath {0} failed with error {1}"
|
||||
tty.warn(msg.format(target, e))
|
||||
return output
|
||||
tty.warn(str(e))
|
||||
return None
|
||||
|
||||
|
||||
def needs_binary_relocation(m_type, m_subtype):
|
||||
|
@ -501,10 +503,12 @@ def new_relocate_elf_binaries(binaries, prefix_to_prefix):
|
|||
|
||||
for path in binaries:
|
||||
try:
|
||||
elf.replace_rpath_in_place_or_raise(path, prefix_to_prefix)
|
||||
except elf.ElfDynamicSectionUpdateFailed as e:
|
||||
# Fall back to the old `patchelf --set-rpath` method.
|
||||
_set_elf_rpaths(path, e.new.decode("utf-8").split(":"))
|
||||
elf.substitute_rpath_and_pt_interp_in_place_or_raise(path, prefix_to_prefix)
|
||||
except elf.ElfCStringUpdatesFailed as e:
|
||||
# Fall back to `patchelf --set-rpath ... --set-interpreter ...`
|
||||
rpaths = e.rpath.new_value.decode("utf-8").split(":") if e.rpath else []
|
||||
interpreter = e.pt_interp.new_value.decode("utf-8") if e.pt_interp else None
|
||||
_set_elf_rpaths_and_interpreter(path, rpaths=rpaths, interpreter=interpreter)
|
||||
|
||||
|
||||
def relocate_elf_binaries(
|
||||
|
@ -546,10 +550,10 @@ def relocate_elf_binaries(
|
|||
new_rpaths = _make_relative(new_binary, new_root, new_norm_rpaths)
|
||||
# check to see if relative rpaths are changed before rewriting
|
||||
if sorted(new_rpaths) != sorted(orig_rpaths):
|
||||
_set_elf_rpaths(new_binary, new_rpaths)
|
||||
_set_elf_rpaths_and_interpreter(new_binary, new_rpaths)
|
||||
else:
|
||||
new_rpaths = _transform_rpaths(orig_rpaths, orig_root, new_prefixes)
|
||||
_set_elf_rpaths(new_binary, new_rpaths)
|
||||
_set_elf_rpaths_and_interpreter(new_binary, new_rpaths)
|
||||
|
||||
|
||||
def make_link_relative(new_links, orig_links):
|
||||
|
@ -596,7 +600,7 @@ def make_elf_binaries_relative(new_binaries, orig_binaries, orig_layout_root):
|
|||
orig_rpaths = _elf_rpaths_for(new_binary)
|
||||
if orig_rpaths:
|
||||
new_rpaths = _make_relative(orig_binary, orig_layout_root, orig_rpaths)
|
||||
_set_elf_rpaths(new_binary, new_rpaths)
|
||||
_set_elf_rpaths_and_interpreter(new_binary, new_rpaths)
|
||||
|
||||
|
||||
def warn_if_link_cant_be_relocated(link, target):
|
||||
|
|
|
@ -1851,7 +1851,7 @@ def binary_with_rpaths(prefix_tmpdir):
|
|||
paths are encoded with `$ORIGIN` prepended.
|
||||
"""
|
||||
|
||||
def _factory(rpaths, message="Hello world!"):
|
||||
def _factory(rpaths, message="Hello world!", dynamic_linker="/lib64/ld-linux.so.2"):
|
||||
source = prefix_tmpdir.join("main.c")
|
||||
source.write(
|
||||
"""
|
||||
|
@ -1867,10 +1867,10 @@ def _factory(rpaths, message="Hello world!"):
|
|||
executable = source.dirpath("main.x")
|
||||
# Encode relative RPATHs using `$ORIGIN` as the root prefix
|
||||
rpaths = [x if os.path.isabs(x) else os.path.join("$ORIGIN", x) for x in rpaths]
|
||||
rpath_str = ":".join(rpaths)
|
||||
opts = [
|
||||
"-Wl,--disable-new-dtags",
|
||||
"-Wl,-rpath={0}".format(rpath_str),
|
||||
f"-Wl,-rpath={':'.join(rpaths)}",
|
||||
f"-Wl,--dynamic-linker,{dynamic_linker}",
|
||||
str(source),
|
||||
"-o",
|
||||
str(executable),
|
||||
|
|
|
@ -46,14 +46,6 @@ def text_in_bin(text, binary):
|
|||
return True
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_patchelf(tmpdir, mock_executable):
|
||||
def _factory(output):
|
||||
return mock_executable("patchelf", output=output)
|
||||
|
||||
return _factory
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def make_dylib(tmpdir_factory):
|
||||
"""Create a shared library with unfriendly qualities.
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
|
||||
import io
|
||||
from collections import OrderedDict
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -137,45 +136,46 @@ def test_only_header():
|
|||
|
||||
@pytest.mark.requires_executables("gcc")
|
||||
@skip_unless_linux
|
||||
def test_elf_get_and_replace_rpaths(binary_with_rpaths):
|
||||
long_rpaths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"]
|
||||
executable = str(binary_with_rpaths(rpaths=long_rpaths))
|
||||
|
||||
# Before
|
||||
assert elf.get_rpaths(executable) == long_rpaths
|
||||
|
||||
replacements = OrderedDict(
|
||||
[
|
||||
(b"/very/long/prefix-a", b"/short-a"),
|
||||
(b"/very/long/prefix-b", b"/short-b"),
|
||||
(b"/very/long", b"/dont"),
|
||||
]
|
||||
def test_elf_get_and_replace_rpaths_and_pt_interp(binary_with_rpaths):
|
||||
long_paths = ["/very/long/prefix-a/x", "/very/long/prefix-b/y"]
|
||||
executable = str(
|
||||
binary_with_rpaths(rpaths=long_paths, dynamic_linker="/very/long/prefix-b/lib/ld.so")
|
||||
)
|
||||
|
||||
# Before
|
||||
assert elf.get_rpaths(executable) == long_paths
|
||||
|
||||
replacements = {
|
||||
b"/very/long/prefix-a": b"/short-a",
|
||||
b"/very/long/prefix-b": b"/short-b",
|
||||
b"/very/long": b"/dont",
|
||||
}
|
||||
|
||||
# Replace once: should modify the file.
|
||||
assert elf.replace_rpath_in_place_or_raise(executable, replacements)
|
||||
assert elf.substitute_rpath_and_pt_interp_in_place_or_raise(executable, replacements)
|
||||
|
||||
# Replace twice: nothing to be done.
|
||||
assert not elf.replace_rpath_in_place_or_raise(executable, replacements)
|
||||
assert not elf.substitute_rpath_and_pt_interp_in_place_or_raise(executable, replacements)
|
||||
|
||||
# Verify the rpaths were modified correctly
|
||||
assert elf.get_rpaths(executable) == ["/short-a/x", "/short-b/y"]
|
||||
assert elf.get_interpreter(executable) == "/short-b/lib/ld.so"
|
||||
|
||||
# Going back to long rpaths should fail, since we've added trailing \0
|
||||
# bytes, and replacement can't assume it can write back in repeated null
|
||||
# bytes -- it may correspond to zero-length strings for example.
|
||||
with pytest.raises(
|
||||
elf.ElfDynamicSectionUpdateFailed,
|
||||
match="New rpath /very/long/prefix-a/x:/very/long/prefix-b/y is "
|
||||
"longer than old rpath /short-a/x:/short-b/y",
|
||||
):
|
||||
elf.replace_rpath_in_place_or_raise(
|
||||
executable,
|
||||
OrderedDict(
|
||||
[(b"/short-a", b"/very/long/prefix-a"), (b"/short-b", b"/very/long/prefix-b")]
|
||||
),
|
||||
with pytest.raises(elf.ElfCStringUpdatesFailed) as info:
|
||||
elf.substitute_rpath_and_pt_interp_in_place_or_raise(
|
||||
executable, {b"/short-a": b"/very/long/prefix-a", b"/short-b": b"/very/long/prefix-b"}
|
||||
)
|
||||
|
||||
assert info.value.rpath is not None
|
||||
assert info.value.pt_interp is not None
|
||||
assert info.value.rpath.old_value == b"/short-a/x:/short-b/y"
|
||||
assert info.value.rpath.new_value == b"/very/long/prefix-a/x:/very/long/prefix-b/y"
|
||||
assert info.value.pt_interp.old_value == b"/short-b/lib/ld.so"
|
||||
assert info.value.pt_interp.new_value == b"/very/long/prefix-b/lib/ld.so"
|
||||
|
||||
|
||||
@pytest.mark.requires_executables("gcc")
|
||||
@skip_unless_linux
|
||||
|
|
|
@ -6,53 +6,59 @@
|
|||
import bisect
|
||||
import re
|
||||
import struct
|
||||
from collections import namedtuple
|
||||
from struct import calcsize, unpack, unpack_from
|
||||
from typing import BinaryIO, Dict, List, NamedTuple, Optional, Pattern, Tuple
|
||||
|
||||
ElfHeader = namedtuple(
|
||||
"ElfHeader",
|
||||
[
|
||||
"e_type",
|
||||
"e_machine",
|
||||
"e_version",
|
||||
"e_entry",
|
||||
"e_phoff",
|
||||
"e_shoff",
|
||||
"e_flags",
|
||||
"e_ehsize",
|
||||
"e_phentsize",
|
||||
"e_phnum",
|
||||
"e_shentsize",
|
||||
"e_shnum",
|
||||
"e_shstrndx",
|
||||
],
|
||||
)
|
||||
|
||||
SectionHeader = namedtuple(
|
||||
"SectionHeader",
|
||||
[
|
||||
"sh_name",
|
||||
"sh_type",
|
||||
"sh_flags",
|
||||
"sh_addr",
|
||||
"sh_offset",
|
||||
"sh_size",
|
||||
"sh_link",
|
||||
"sh_info",
|
||||
"sh_addralign",
|
||||
"sh_entsize",
|
||||
],
|
||||
)
|
||||
class ElfHeader(NamedTuple):
|
||||
e_type: int
|
||||
e_machine: int
|
||||
e_version: int
|
||||
e_entry: int
|
||||
e_phoff: int
|
||||
e_shoff: int
|
||||
e_flags: int
|
||||
e_ehsize: int
|
||||
e_phentsize: int
|
||||
e_phnum: int
|
||||
e_shentsize: int
|
||||
e_shnum: int
|
||||
e_shstrndx: int
|
||||
|
||||
ProgramHeader32 = namedtuple(
|
||||
"ProgramHeader32",
|
||||
["p_type", "p_offset", "p_vaddr", "p_paddr", "p_filesz", "p_memsz", "p_flags", "p_align"],
|
||||
)
|
||||
|
||||
ProgramHeader64 = namedtuple(
|
||||
"ProgramHeader64",
|
||||
["p_type", "p_flags", "p_offset", "p_vaddr", "p_paddr", "p_filesz", "p_memsz", "p_align"],
|
||||
)
|
||||
class SectionHeader(NamedTuple):
|
||||
sh_name: int
|
||||
sh_type: int
|
||||
sh_flags: int
|
||||
sh_addr: int
|
||||
sh_offset: int
|
||||
sh_size: int
|
||||
sh_link: int
|
||||
sh_info: int
|
||||
sh_addralign: int
|
||||
sh_entsize: int
|
||||
|
||||
|
||||
class ProgramHeader32(NamedTuple):
|
||||
p_type: int
|
||||
p_offset: int
|
||||
p_vaddr: int
|
||||
p_paddr: int
|
||||
p_filesz: int
|
||||
p_memsz: int
|
||||
p_flags: int
|
||||
p_align: int
|
||||
|
||||
|
||||
class ProgramHeader64(NamedTuple):
|
||||
p_type: int
|
||||
p_flags: int
|
||||
p_offset: int
|
||||
p_vaddr: int
|
||||
p_paddr: int
|
||||
p_filesz: int
|
||||
p_memsz: int
|
||||
p_align: int
|
||||
|
||||
|
||||
class ELF_CONSTANTS:
|
||||
|
@ -78,6 +84,31 @@ class ELF_CONSTANTS:
|
|||
class ElfFile:
|
||||
"""Parsed ELF file."""
|
||||
|
||||
is_64_bit: bool
|
||||
is_little_endian: bool
|
||||
byte_order: str
|
||||
elf_hdr: ElfHeader
|
||||
pt_load: List[Tuple[int, int]]
|
||||
has_pt_interp: bool
|
||||
pt_interp_p_offset: int
|
||||
pt_interp_p_filesz: int
|
||||
pt_interp_str: bytes
|
||||
has_pt_dynamic: bool
|
||||
pt_dynamic_p_offset: int
|
||||
pt_dynamic_p_filesz: int
|
||||
pt_dynamic_strtab_offset: int
|
||||
has_rpath: bool
|
||||
dt_rpath_offset: int
|
||||
dt_rpath_str: bytes
|
||||
rpath_strtab_offset: int
|
||||
is_runpath: bool
|
||||
has_needed: bool
|
||||
dt_needed_strtab_offsets: List[int]
|
||||
dt_needed_strs: List[bytes]
|
||||
has_soname: bool
|
||||
dt_soname_strtab_offset: int
|
||||
dt_soname_str: bytes
|
||||
|
||||
__slots__ = [
|
||||
"is_64_bit",
|
||||
"is_little_endian",
|
||||
|
@ -120,13 +151,13 @@ def __init__(self):
|
|||
self.has_pt_interp = False
|
||||
|
||||
|
||||
def parse_c_string(byte_string, start=0):
|
||||
def parse_c_string(byte_string: bytes, start: int = 0) -> bytes:
|
||||
"""
|
||||
Retrieve a C-string at a given offset in a byte string
|
||||
|
||||
Arguments:
|
||||
byte_string (bytes): String
|
||||
start (int): Offset into the string
|
||||
byte_string: String
|
||||
start: Offset into the string
|
||||
|
||||
Returns:
|
||||
bytes: A copy of the C-string excluding the terminating null byte
|
||||
|
@ -137,15 +168,15 @@ def parse_c_string(byte_string, start=0):
|
|||
return byte_string[start:str_end]
|
||||
|
||||
|
||||
def read_exactly(f, num_bytes, msg):
|
||||
def read_exactly(f: BinaryIO, num_bytes: int, msg: str) -> bytes:
|
||||
"""
|
||||
Read exactly num_bytes at the current offset, otherwise raise
|
||||
a parsing error with the given error message.
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
num_bytes (int): Number of bytes to read
|
||||
msg (str): Error to show when bytes cannot be read
|
||||
num_bytes: Number of bytes to read
|
||||
msg: Error to show when bytes cannot be read
|
||||
|
||||
Returns:
|
||||
bytes: the ``num_bytes`` bytes that were read.
|
||||
|
@ -156,19 +187,18 @@ def read_exactly(f, num_bytes, msg):
|
|||
return data
|
||||
|
||||
|
||||
def parse_program_headers(f, elf):
|
||||
def parse_program_headers(f: BinaryIO, elf: ElfFile) -> None:
|
||||
"""
|
||||
Parse program headers
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
elf (ElfFile): ELF file parser data
|
||||
elf: ELF file parser data
|
||||
"""
|
||||
# Forward to the program header
|
||||
f.seek(elf.elf_hdr.e_phoff)
|
||||
|
||||
# Here we have to make a mapping from virtual address to offset in the file.
|
||||
ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32
|
||||
ph_fmt = elf.byte_order + ("LLQQQQQQ" if elf.is_64_bit else "LLLLLLLL")
|
||||
ph_size = calcsize(ph_fmt)
|
||||
ph_num = elf.elf_hdr.e_phnum
|
||||
|
@ -176,28 +206,31 @@ def parse_program_headers(f, elf):
|
|||
# Read all program headers in one go
|
||||
data = read_exactly(f, ph_num * ph_size, "Malformed program header")
|
||||
|
||||
ProgramHeader = ProgramHeader64 if elf.is_64_bit else ProgramHeader32
|
||||
|
||||
for i in range(ph_num):
|
||||
ph = ProgramHeader._make(unpack_from(ph_fmt, data, i * ph_size))
|
||||
# mypy currently does not understand the union of two named tuples with equal fields
|
||||
ph = ProgramHeader(*unpack_from(ph_fmt, data, i * ph_size))
|
||||
|
||||
# Skip segments of size 0; we don't distinguish between missing segment and
|
||||
# empty segments. I've see an empty PT_DYNAMIC section for an ELF file that
|
||||
# contained debug data.
|
||||
if ph.p_filesz == 0:
|
||||
if ph.p_filesz == 0: # type: ignore
|
||||
continue
|
||||
|
||||
# For PT_LOAD entries: Save offsets and virtual addrs of the loaded ELF segments
|
||||
# This way we can map offsets by virtual address to offsets in the file.
|
||||
if ph.p_type == ELF_CONSTANTS.PT_LOAD:
|
||||
elf.pt_load.append((ph.p_offset, ph.p_vaddr))
|
||||
if ph.p_type == ELF_CONSTANTS.PT_LOAD: # type: ignore
|
||||
elf.pt_load.append((ph.p_offset, ph.p_vaddr)) # type: ignore
|
||||
|
||||
elif ph.p_type == ELF_CONSTANTS.PT_INTERP:
|
||||
elf.pt_interp_p_offset = ph.p_offset
|
||||
elf.pt_interp_p_filesz = ph.p_filesz
|
||||
elif ph.p_type == ELF_CONSTANTS.PT_INTERP: # type: ignore
|
||||
elf.pt_interp_p_offset = ph.p_offset # type: ignore
|
||||
elf.pt_interp_p_filesz = ph.p_filesz # type: ignore
|
||||
elf.has_pt_interp = True
|
||||
|
||||
elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC:
|
||||
elf.pt_dynamic_p_offset = ph.p_offset
|
||||
elf.pt_dynamic_p_filesz = ph.p_filesz
|
||||
elif ph.p_type == ELF_CONSTANTS.PT_DYNAMIC: # type: ignore
|
||||
elf.pt_dynamic_p_offset = ph.p_offset # type: ignore
|
||||
elf.pt_dynamic_p_filesz = ph.p_filesz # type: ignore
|
||||
elf.has_pt_dynamic = True
|
||||
|
||||
# The linker sorts PT_LOAD segments by vaddr, but let's do it just to be sure, since
|
||||
|
@ -205,27 +238,27 @@ def parse_program_headers(f, elf):
|
|||
elf.pt_load.sort(key=lambda x: x[1])
|
||||
|
||||
|
||||
def parse_pt_interp(f, elf):
|
||||
def parse_pt_interp(f: BinaryIO, elf: ElfFile) -> None:
|
||||
"""
|
||||
Parse the interpreter (i.e. absolute path to the dynamic linker)
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
elf (ElfFile): ELF file parser data
|
||||
elf: ELF file parser data
|
||||
"""
|
||||
f.seek(elf.pt_interp_p_offset)
|
||||
data = read_exactly(f, elf.pt_interp_p_filesz, "Malformed PT_INTERP entry")
|
||||
elf.pt_interp_str = parse_c_string(data)
|
||||
|
||||
|
||||
def find_strtab_size_at_offset(f, elf, offset):
|
||||
def find_strtab_size_at_offset(f: BinaryIO, elf: ElfFile, offset: int) -> int:
|
||||
"""
|
||||
Retrieve the size of a string table section at a particular known offset
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
elf (ElfFile): ELF file parser data
|
||||
offset (int): offset of the section in the file (i.e. ``sh_offset``)
|
||||
elf: ELF file parser data
|
||||
offset: offset of the section in the file (i.e. ``sh_offset``)
|
||||
|
||||
Returns:
|
||||
int: the size of the string table in bytes
|
||||
|
@ -235,50 +268,49 @@ def find_strtab_size_at_offset(f, elf, offset):
|
|||
f.seek(elf.elf_hdr.e_shoff)
|
||||
for _ in range(elf.elf_hdr.e_shnum):
|
||||
data = read_exactly(f, section_hdr_size, "Malformed section header")
|
||||
sh = SectionHeader._make(unpack(section_hdr_fmt, data))
|
||||
sh = SectionHeader(*unpack(section_hdr_fmt, data))
|
||||
if sh.sh_type == ELF_CONSTANTS.SHT_STRTAB and sh.sh_offset == offset:
|
||||
return sh.sh_size
|
||||
|
||||
raise ElfParsingError("Could not determine strtab size")
|
||||
|
||||
|
||||
def retrieve_strtab(f, elf, offset):
|
||||
def retrieve_strtab(f: BinaryIO, elf: ElfFile, offset: int) -> bytes:
|
||||
"""
|
||||
Read a full string table at the given offset, which
|
||||
requires looking it up in the section headers.
|
||||
|
||||
Arguments:
|
||||
elf (ElfFile): ELF file parser data
|
||||
vaddr (int): virtual address
|
||||
elf: ELF file parser data
|
||||
vaddr: virtual address
|
||||
|
||||
Returns:
|
||||
bytes: file offset
|
||||
Returns: file offset
|
||||
"""
|
||||
size = find_strtab_size_at_offset(f, elf, offset)
|
||||
f.seek(offset)
|
||||
return read_exactly(f, size, "Could not read string table")
|
||||
|
||||
|
||||
def vaddr_to_offset(elf, vaddr):
|
||||
def vaddr_to_offset(elf: ElfFile, vaddr: int) -> int:
|
||||
"""
|
||||
Given a virtual address, find the corresponding offset in the ELF file itself.
|
||||
|
||||
Arguments:
|
||||
elf (ElfFile): ELF file parser data
|
||||
vaddr (int): virtual address
|
||||
elf: ELF file parser data
|
||||
vaddr: virtual address
|
||||
"""
|
||||
idx = bisect.bisect_right([p_vaddr for (p_offset, p_vaddr) in elf.pt_load], vaddr) - 1
|
||||
p_offset, p_vaddr = elf.pt_load[idx]
|
||||
return p_offset - p_vaddr + vaddr
|
||||
|
||||
|
||||
def parse_pt_dynamic(f, elf):
|
||||
def parse_pt_dynamic(f: BinaryIO, elf: ElfFile) -> None:
|
||||
"""
|
||||
Parse the dynamic section of an ELF file
|
||||
|
||||
Arguments:
|
||||
f: file handle
|
||||
elf (ElfFile): ELF file parse data
|
||||
elf: ELF file parse data
|
||||
"""
|
||||
dynamic_array_fmt = elf.byte_order + ("qQ" if elf.is_64_bit else "lL")
|
||||
dynamic_array_size = calcsize(dynamic_array_fmt)
|
||||
|
@ -347,7 +379,7 @@ def parse_pt_dynamic(f, elf):
|
|||
elf.dt_rpath_str = parse_c_string(string_table, elf.rpath_strtab_offset)
|
||||
|
||||
|
||||
def parse_header(f, elf):
|
||||
def parse_header(f: BinaryIO, elf: ElfFile) -> None:
|
||||
# Read the 32/64 bit class independent part of the header and validate
|
||||
e_ident = f.read(16)
|
||||
|
||||
|
@ -374,10 +406,12 @@ def parse_header(f, elf):
|
|||
elf_header_fmt = elf.byte_order + ("HHLQQQLHHHHHH" if elf.is_64_bit else "HHLLLLLHHHHHH")
|
||||
hdr_size = calcsize(elf_header_fmt)
|
||||
data = read_exactly(f, hdr_size, "ELF header malformed")
|
||||
elf.elf_hdr = ElfHeader._make(unpack(elf_header_fmt, data))
|
||||
elf.elf_hdr = ElfHeader(*unpack(elf_header_fmt, data))
|
||||
|
||||
|
||||
def _do_parse_elf(f, interpreter=True, dynamic_section=True, only_header=False):
|
||||
def _do_parse_elf(
|
||||
f: BinaryIO, interpreter: bool = True, dynamic_section: bool = True, only_header: bool = False
|
||||
) -> ElfFile:
|
||||
# We don't (yet?) allow parsing ELF files at a nonzero offset, we just
|
||||
# jump to absolute offsets as they are specified in the ELF file.
|
||||
if f.tell() != 0:
|
||||
|
@ -406,7 +440,12 @@ def _do_parse_elf(f, interpreter=True, dynamic_section=True, only_header=False):
|
|||
return elf
|
||||
|
||||
|
||||
def parse_elf(f, interpreter=False, dynamic_section=False, only_header=False):
|
||||
def parse_elf(
|
||||
f: BinaryIO,
|
||||
interpreter: bool = False,
|
||||
dynamic_section: bool = False,
|
||||
only_header: bool = False,
|
||||
) -> ElfFile:
|
||||
"""Given a file handle f for an ELF file opened in binary mode, return an ElfFile
|
||||
object that is stores data about rpaths"""
|
||||
try:
|
||||
|
@ -417,28 +456,30 @@ def parse_elf(f, interpreter=False, dynamic_section=False, only_header=False):
|
|||
raise ElfParsingError("Malformed ELF file")
|
||||
|
||||
|
||||
def get_rpaths(path):
|
||||
"""Returns list of rpaths of the given file as UTF-8 strings, or None if the file
|
||||
does not have any rpaths."""
|
||||
def get_rpaths(path: str) -> Optional[List[str]]:
|
||||
"""Returns list of rpaths of the given file as UTF-8 strings, or None if not set."""
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
elf = parse_elf(f, interpreter=False, dynamic_section=True)
|
||||
return elf.dt_rpath_str.decode("utf-8").split(":") if elf.has_rpath else None
|
||||
except ElfParsingError:
|
||||
return None
|
||||
|
||||
if not elf.has_rpath:
|
||||
|
||||
def get_interpreter(path: str) -> Optional[str]:
|
||||
"""Returns the interpreter of the given file as UTF-8 string, or None if not set."""
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
elf = parse_elf(f, interpreter=True, dynamic_section=False)
|
||||
return elf.pt_interp_str.decode("utf-8") if elf.has_pt_interp else None
|
||||
except ElfParsingError:
|
||||
return None
|
||||
|
||||
# If it does, split the string in components
|
||||
rpath = elf.dt_rpath_str
|
||||
rpath = rpath.decode("utf-8")
|
||||
return rpath.split(":")
|
||||
|
||||
|
||||
def delete_rpath(path):
|
||||
"""Modifies a binary to remove the rpath. It zeros out the rpath string
|
||||
and also drops the DT_R(UN)PATH entry from the dynamic section, so it doesn't
|
||||
show up in 'readelf -d file', nor in 'strings file'."""
|
||||
def delete_rpath(path: str) -> None:
|
||||
"""Modifies a binary to remove the rpath. It zeros out the rpath string and also drops the
|
||||
DT_R(UN)PATH entry from the dynamic section, so it doesn't show up in 'readelf -d file', nor
|
||||
in 'strings file'."""
|
||||
with open(path, "rb+") as f:
|
||||
elf = parse_elf(f, interpreter=False, dynamic_section=True)
|
||||
|
||||
|
@ -476,75 +517,136 @@ def delete_rpath(path):
|
|||
old_offset += dynamic_array_size
|
||||
|
||||
|
||||
def replace_rpath_in_place_or_raise(path, substitutions):
|
||||
class CStringType:
|
||||
PT_INTERP = 1
|
||||
RPATH = 2
|
||||
|
||||
|
||||
class UpdateCStringAction:
|
||||
def __init__(self, old_value: bytes, new_value: bytes, offset: int):
|
||||
self.old_value = old_value
|
||||
self.new_value = new_value
|
||||
self.offset = offset
|
||||
|
||||
@property
|
||||
def inplace(self) -> bool:
|
||||
return len(self.new_value) <= len(self.old_value)
|
||||
|
||||
def apply(self, f: BinaryIO) -> None:
|
||||
assert self.inplace
|
||||
|
||||
f.seek(self.offset)
|
||||
f.write(self.new_value)
|
||||
|
||||
# We zero out the bits we shortened because (a) it should be a
|
||||
# C-string and (b) it's nice not to have spurious parts of old
|
||||
# paths in the output of `strings file`. Note that we're all
|
||||
# good when pad == 0; the original terminating null is used.
|
||||
f.write(b"\x00" * (len(self.old_value) - len(self.new_value)))
|
||||
|
||||
|
||||
def _get_rpath_substitution(
|
||||
elf: ElfFile, regex: Pattern, substitutions: Dict[bytes, bytes]
|
||||
) -> Optional[UpdateCStringAction]:
|
||||
"""Make rpath substitutions in-place."""
|
||||
# If there's no RPATH, then there's no need to replace anything.
|
||||
if not elf.has_rpath:
|
||||
return None
|
||||
|
||||
# Get the non-empty rpaths. Sometimes there's a bunch of trailing
|
||||
# colons ::::: used for padding, we don't add them back to make it
|
||||
# more likely that the string doesn't grow.
|
||||
rpaths = list(filter(len, elf.dt_rpath_str.split(b":")))
|
||||
|
||||
num_rpaths = len(rpaths)
|
||||
|
||||
if num_rpaths == 0:
|
||||
return None
|
||||
|
||||
changed = False
|
||||
for i in range(num_rpaths):
|
||||
old_rpath = rpaths[i]
|
||||
match = regex.match(old_rpath)
|
||||
if match:
|
||||
changed = True
|
||||
rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :]
|
||||
|
||||
# Nothing to replace!
|
||||
if not changed:
|
||||
return None
|
||||
|
||||
return UpdateCStringAction(
|
||||
old_value=elf.dt_rpath_str,
|
||||
new_value=b":".join(rpaths),
|
||||
# The rpath is at a given offset in the string table used by the dynamic section.
|
||||
offset=elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset,
|
||||
)
|
||||
|
||||
|
||||
def _get_pt_interp_substitution(
|
||||
elf: ElfFile, regex: Pattern, substitutions: Dict[bytes, bytes]
|
||||
) -> Optional[UpdateCStringAction]:
|
||||
"""Make interpreter substitutions in-place."""
|
||||
if not elf.has_pt_interp:
|
||||
return None
|
||||
|
||||
match = regex.match(elf.pt_interp_str)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
return UpdateCStringAction(
|
||||
old_value=elf.pt_interp_str,
|
||||
new_value=substitutions[match.group()] + elf.pt_interp_str[match.end() :],
|
||||
offset=elf.pt_interp_p_offset,
|
||||
)
|
||||
|
||||
|
||||
def substitute_rpath_and_pt_interp_in_place_or_raise(
|
||||
path: str, substitutions: Dict[bytes, bytes]
|
||||
) -> bool:
|
||||
"""Returns true if the rpath and interpreter were modified, false if there was nothing to do.
|
||||
Raises ElfCStringUpdatesFailed if the ELF file cannot be updated in-place. This exception
|
||||
contains a list of actions to perform with other tools. The file is left untouched in this
|
||||
case."""
|
||||
regex = re.compile(b"|".join(re.escape(p) for p in substitutions.keys()))
|
||||
|
||||
try:
|
||||
with open(path, "rb+") as f:
|
||||
elf = parse_elf(f, interpreter=False, dynamic_section=True)
|
||||
elf = parse_elf(f, interpreter=True, dynamic_section=True)
|
||||
|
||||
# If there's no RPATH, then there's no need to replace anything.
|
||||
if not elf.has_rpath:
|
||||
# Get the actions to perform.
|
||||
rpath = _get_rpath_substitution(elf, regex, substitutions)
|
||||
pt_interp = _get_pt_interp_substitution(elf, regex, substitutions)
|
||||
|
||||
# Nothing to do.
|
||||
if not rpath and not pt_interp:
|
||||
return False
|
||||
|
||||
# Get the non-empty rpaths. Sometimes there's a bunch of trailing
|
||||
# colons ::::: used for padding, we don't add them back to make it
|
||||
# more likely that the string doesn't grow.
|
||||
rpaths = list(filter(len, elf.dt_rpath_str.split(b":")))
|
||||
# If we can't update in-place, leave it to other tools, don't do partial updates.
|
||||
if rpath and not rpath.inplace or pt_interp and not pt_interp.inplace:
|
||||
raise ElfCStringUpdatesFailed(rpath, pt_interp)
|
||||
|
||||
num_rpaths = len(rpaths)
|
||||
# Otherwise, apply the updates.
|
||||
if rpath:
|
||||
rpath.apply(f)
|
||||
|
||||
if num_rpaths == 0:
|
||||
return False
|
||||
if pt_interp:
|
||||
pt_interp.apply(f)
|
||||
|
||||
changed = False
|
||||
for i in range(num_rpaths):
|
||||
old_rpath = rpaths[i]
|
||||
match = regex.match(old_rpath)
|
||||
if match:
|
||||
changed = True
|
||||
rpaths[i] = substitutions[match.group()] + old_rpath[match.end() :]
|
||||
|
||||
# Nothing to replace!
|
||||
if not changed:
|
||||
return False
|
||||
|
||||
new_rpath_string = b":".join(rpaths)
|
||||
|
||||
pad = len(elf.dt_rpath_str) - len(new_rpath_string)
|
||||
|
||||
if pad < 0:
|
||||
raise ElfDynamicSectionUpdateFailed(elf.dt_rpath_str, new_rpath_string)
|
||||
|
||||
# We zero out the bits we shortened because (a) it should be a
|
||||
# C-string and (b) it's nice not to have spurious parts of old
|
||||
# paths in the output of `strings file`. Note that we're all
|
||||
# good when pad == 0; the original terminating null is used.
|
||||
new_rpath_string += b"\x00" * pad
|
||||
|
||||
# The rpath is at a given offset in the string table used by the
|
||||
# dynamic section.
|
||||
rpath_offset = elf.pt_dynamic_strtab_offset + elf.rpath_strtab_offset
|
||||
|
||||
f.seek(rpath_offset)
|
||||
f.write(new_rpath_string)
|
||||
return True
|
||||
|
||||
except ElfParsingError:
|
||||
# This just means the file wasnt an elf file, so there's no point
|
||||
# This just means the file wasn't an elf file, so there's no point
|
||||
# in updating its rpath anyways; ignore this problem.
|
||||
return False
|
||||
|
||||
|
||||
class ElfDynamicSectionUpdateFailed(Exception):
|
||||
def __init__(self, old, new):
|
||||
self.old = old
|
||||
self.new = new
|
||||
super().__init__(
|
||||
"New rpath {} is longer than old rpath {}".format(
|
||||
new.decode("utf-8"), old.decode("utf-8")
|
||||
)
|
||||
)
|
||||
class ElfCStringUpdatesFailed(Exception):
|
||||
def __init__(
|
||||
self, rpath: Optional[UpdateCStringAction], pt_interp: Optional[UpdateCStringAction]
|
||||
):
|
||||
self.rpath = rpath
|
||||
self.pt_interp = pt_interp
|
||||
|
||||
|
||||
class ElfParsingError(Exception):
|
||||
|
|
Loading…
Reference in a new issue