Reduce the size on disk for logs (#42122)

* Reduce the size on disk for logs

This PR does two things:

1. Store a compressed `spack-build-out.txt.gz`
2. Get rid of phase logs, as they are duplicates of
   `spack-build-out.txt`

The logs are not compressed in the stage dir, so on build failure the
workflow for users is no different.

It's just that on install the logs are rarely used, and if needed, users
can easily `gzip -d` or `zgrep` them.

In the case of GCC installs, the compressed logs are <5% of the original
size, which is typically dozens of MBs.

* get rid of "backwards compat" of file names in stage dirs
This commit is contained in:
Harmen Stoppels 2024-01-19 01:02:11 +01:00 committed by GitHub
parent 2d37d5474d
commit 4d54688782
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 39 additions and 83 deletions

View file

@ -290,11 +290,11 @@ def require_user_confirmation_for_overwrite(concrete_specs, args):
def _dump_log_on_error(e: spack.build_environment.InstallError):
e.print_context()
assert e.pkg, "Expected InstallError to include the associated package"
if not os.path.exists(e.pkg.build_log_path):
if not os.path.exists(e.pkg.log_path):
tty.error("'spack install' created no log.")
else:
sys.stderr.write("Full build log:\n")
with open(e.pkg.build_log_path, errors="replace") as log:
with open(e.pkg.log_path, errors="replace") as log:
shutil.copyfileobj(log, sys.stderr)

View file

@ -1834,19 +1834,18 @@ def _get_overwrite_specs(self):
]
def _install_log_links(self, spec):
if not spec.external:
# Make sure log directory exists
log_path = self.log_path
fs.mkdirp(log_path)
if spec.external:
return
# Make sure log directory exists
log_path = self.log_path
fs.mkdirp(log_path)
with fs.working_dir(self.path):
# Link the resulting log file into logs dir
build_log_link = os.path.join(
log_path, "%s-%s.log" % (spec.name, spec.dag_hash(7))
)
if os.path.lexists(build_log_link):
os.remove(build_log_link)
symlink(spec.package.build_log_path, build_log_link)
with fs.working_dir(self.path):
# Link the resulting log file into logs dir
build_log_link = os.path.join(log_path, f"{spec.name}-{spec.dag_hash(7)}.log")
if os.path.lexists(build_log_link):
os.remove(build_log_link)
symlink(spec.package.install_log_path, build_log_link)
def _partition_roots_by_install_status(self):
"""Partition root specs into those that do not have to be passed to the

View file

@ -36,6 +36,7 @@
import sys
import time
from collections import defaultdict
from gzip import GzipFile
from typing import Dict, Iterator, List, Optional, Set, Tuple
import llnl.util.filesystem as fs
@ -638,13 +639,12 @@ def archive_install_logs(pkg: "spack.package_base.PackageBase", phase_log_dir: s
pkg: the package that was built and installed
phase_log_dir: path to the archive directory
"""
# Archive the whole stdout + stderr for the package
fs.install(pkg.log_path, pkg.install_log_path)
# Archive all phase log paths
for phase_log in pkg.phase_log_files:
log_file = os.path.basename(phase_log)
fs.install(phase_log, os.path.join(phase_log_dir, log_file))
# Copy a compressed version of the install log
with open(pkg.log_path, "rb") as f, open(pkg.install_log_path, "wb") as g:
# Use GzipFile directly so we can omit filename / mtime in header
gzip_file = GzipFile(filename="", mode="wb", compresslevel=6, mtime=0, fileobj=g)
shutil.copyfileobj(f, gzip_file)
gzip_file.close()
# Archive the install-phase test log, if present
pkg.archive_install_test_log()

View file

@ -1131,13 +1131,7 @@ def stage(self, stage):
@property
def env_path(self):
"""Return the build environment file path associated with staging."""
# Backward compatibility: Return the name of an existing log path;
# otherwise, return the current install env path name.
old_filename = os.path.join(self.stage.path, "spack-build.env")
if os.path.exists(old_filename):
return old_filename
else:
return os.path.join(self.stage.path, _spack_build_envfile)
return os.path.join(self.stage.path, _spack_build_envfile)
@property
def env_mods_path(self):
@ -1168,13 +1162,6 @@ def install_env_path(self):
@property
def log_path(self):
"""Return the build log file path associated with staging."""
# Backward compatibility: Return the name of an existing log path.
for filename in ["spack-build.out", "spack-build.txt"]:
old_log = os.path.join(self.stage.path, filename)
if os.path.exists(old_log):
return old_log
# Otherwise, return the current log path name.
return os.path.join(self.stage.path, _spack_build_logfile)
@property
@ -1187,15 +1174,15 @@ def phase_log_files(self):
@property
def install_log_path(self):
"""Return the build log file path on successful installation."""
"""Return the (compressed) build log file path on successful installation"""
# Backward compatibility: Return the name of an existing install log.
for filename in ["build.out", "build.txt"]:
for filename in [_spack_build_logfile, "build.out", "build.txt"]:
old_log = os.path.join(self.metadata_dir, filename)
if os.path.exists(old_log):
return old_log
# Otherwise, return the current install log path name.
return os.path.join(self.metadata_dir, _spack_build_logfile)
return os.path.join(self.metadata_dir, _spack_build_logfile + ".gz")
@property
def configure_args_path(self):
@ -2088,15 +2075,6 @@ def unit_test_check(self):
"""
return True
@property
def build_log_path(self):
"""
Return the expected (or current) build log file path. The path points
to the staging build file until the software is successfully installed,
when it points to the file in the installation directory.
"""
return self.install_log_path if self.spec.installed else self.log_path
@classmethod
def inject_flags(cls: Type[Pb], name: str, flags: Iterable[str]) -> FLAG_HANDLER_RETURN_TYPE:
"""

View file

@ -6,6 +6,7 @@
import collections
import contextlib
import functools
import gzip
import os
import time
import traceback
@ -190,9 +191,13 @@ def on_success(self, pkg, kwargs, package_record):
def fetch_log(self, pkg):
try:
with open(pkg.build_log_path, "r", encoding="utf-8") as stream:
return "".join(stream.readlines())
except Exception:
if os.path.exists(pkg.install_log_path):
stream = gzip.open(pkg.install_log_path, "rt")
else:
stream = open(pkg.log_path)
with stream as f:
return f.read()
except OSError:
return f"Cannot open log for {pkg.spec.cshort_spec}"
def extract_package_from_signature(self, instance, *args, **kwargs):

View file

@ -5,6 +5,7 @@
import argparse
import builtins
import filecmp
import gzip
import itertools
import os
import pathlib
@ -137,8 +138,7 @@ def test_package_output(tmpdir, capsys, install_mockery, mock_fetch):
pkg = spec.package
pkg.do_install(verbose=True)
log_file = pkg.build_log_path
with open(log_file) as f:
with gzip.open(pkg.install_log_path, "rt") as f:
out = f.read()
# make sure that output from the actual package file appears in the

View file

@ -568,8 +568,8 @@ def mock_repo_path():
def _pkg_install_fn(pkg, spec, prefix):
# sanity_check_prefix requires something in the install directory
mkdirp(prefix.bin)
if not os.path.exists(spec.package.build_log_path):
touchp(spec.package.build_log_path)
if not os.path.exists(spec.package.install_log_path):
touchp(spec.package.install_log_path)
@pytest.fixture

View file

@ -444,41 +444,15 @@ def test_nosource_pkg_install_post_install(install_mockery, mock_fetch, mock_pac
def test_pkg_build_paths(install_mockery):
# Get a basic concrete spec for the trivial install package.
spec = Spec("trivial-install-test-package").concretized()
log_path = spec.package.log_path
assert log_path.endswith(_spack_build_logfile)
env_path = spec.package.env_path
assert env_path.endswith(_spack_build_envfile)
# Backward compatibility checks
log_dir = os.path.dirname(log_path)
fs.mkdirp(log_dir)
with fs.working_dir(log_dir):
# Start with the older of the previous log filenames
older_log = "spack-build.out"
fs.touch(older_log)
assert spec.package.log_path.endswith(older_log)
# Now check the newer log filename
last_log = "spack-build.txt"
fs.rename(older_log, last_log)
assert spec.package.log_path.endswith(last_log)
# Check the old environment file
last_env = "spack-build.env"
fs.rename(last_log, last_env)
assert spec.package.env_path.endswith(last_env)
# Cleanup
shutil.rmtree(log_dir)
assert spec.package.log_path.endswith(_spack_build_logfile)
assert spec.package.env_path.endswith(_spack_build_envfile)
def test_pkg_install_paths(install_mockery):
# Get a basic concrete spec for the trivial install package.
spec = Spec("trivial-install-test-package").concretized()
log_path = os.path.join(spec.prefix, ".spack", _spack_build_logfile)
log_path = os.path.join(spec.prefix, ".spack", _spack_build_logfile + ".gz")
assert spec.package.install_log_path == log_path
env_path = os.path.join(spec.prefix, ".spack", _spack_build_envfile)