Fix combine_phase_logs text encoding issues (#34657)

Avoid text decoding and encoding when combining log files, instead
combine in binary mode.

Also do a buffered copy which is sometimes faster for large log files.
This commit is contained in:
Harmen Stoppels 2022-12-22 15:32:48 +01:00 committed by GitHub
parent 3304312b26
commit 161fbfadf4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 5 deletions

View file

@ -460,11 +460,10 @@ def combine_phase_logs(phase_log_files, log_path):
phase_log_files (list): a list or iterator of logs to combine
log_path (str): the path to combine them to
"""
with open(log_path, "w") as log_file:
with open(log_path, "bw") as log_file:
for phase_log_file in phase_log_files:
with open(phase_log_file, "r") as phase_log:
log_file.write(phase_log.read())
with open(phase_log_file, "br") as phase_log:
shutil.copyfileobj(phase_log, log_file)
def dump_packages(spec, path):

View file

@ -622,7 +622,7 @@ def test_combine_phase_logs(tmpdir):
# This is the output log we will combine them into
combined_log = os.path.join(str(tmpdir), "combined-out.txt")
spack.installer.combine_phase_logs(phase_log_files, combined_log)
inst.combine_phase_logs(phase_log_files, combined_log)
with open(combined_log, "r") as log_file:
out = log_file.read()
@ -631,6 +631,22 @@ def test_combine_phase_logs(tmpdir):
assert "Output from %s\n" % log_file in out
def test_combine_phase_logs_does_not_care_about_encoding(tmpdir):
# this is invalid utf-8 at a minimum
data = b"\x00\xF4\xBF\x00\xBF\xBF"
input = [str(tmpdir.join("a")), str(tmpdir.join("b"))]
output = str(tmpdir.join("c"))
for path in input:
with open(path, "wb") as f:
f.write(data)
inst.combine_phase_logs(input, output)
with open(output, "rb") as f:
assert f.read() == data * 2
def test_check_deps_status_install_failure(install_mockery, monkeypatch):
const_arg = installer_args(["a"], {})
installer = create_installer(const_arg)