From 4d546887825311c9c1b6f373b157083934753dc4 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Fri, 19 Jan 2024 01:02:11 +0100 Subject: [PATCH] Reduce the size on disk for logs (#42122) * Reduce the size on disk for logs This PR does two things: 1. Store a compressed `spack-build-out.txt.gz` 2. Get rid of phase logs, as they are duplicates of `spack-build-out.txt` The logs are not compressed in the stage dir, so on build failure the workflow for users is no different. It's just that on install the logs are rarely used, and if needed, users can easily `gzip -d` or `zgrep` them. In the case of GCC installs, the compressed logs are <5% of the original size, which is typically dozens of MBs. * get rid of "backwards compat" of file names in stage dirs --- lib/spack/spack/cmd/install.py | 4 +-- lib/spack/spack/environment/environment.py | 23 ++++++++-------- lib/spack/spack/installer.py | 14 +++++----- lib/spack/spack/package_base.py | 30 +++----------------- lib/spack/spack/report.py | 11 ++++++-- lib/spack/spack/test/cmd/install.py | 4 +-- lib/spack/spack/test/conftest.py | 4 +-- lib/spack/spack/test/install.py | 32 ++-------------------- 8 files changed, 39 insertions(+), 83 deletions(-) diff --git a/lib/spack/spack/cmd/install.py b/lib/spack/spack/cmd/install.py index 288d2cc354..9c66c661a0 100644 --- a/lib/spack/spack/cmd/install.py +++ b/lib/spack/spack/cmd/install.py @@ -290,11 +290,11 @@ def require_user_confirmation_for_overwrite(concrete_specs, args): def _dump_log_on_error(e: spack.build_environment.InstallError): e.print_context() assert e.pkg, "Expected InstallError to include the associated package" - if not os.path.exists(e.pkg.build_log_path): + if not os.path.exists(e.pkg.log_path): tty.error("'spack install' created no log.") else: sys.stderr.write("Full build log:\n") - with open(e.pkg.build_log_path, errors="replace") as log: + with open(e.pkg.log_path, errors="replace") as log: shutil.copyfileobj(log, sys.stderr) diff --git a/lib/spack/spack/environment/environment.py b/lib/spack/spack/environment/environment.py index cacc938985..320cf4d2b8 100644 --- a/lib/spack/spack/environment/environment.py +++ b/lib/spack/spack/environment/environment.py @@ -1834,19 +1834,18 @@ def _get_overwrite_specs(self): ] def _install_log_links(self, spec): - if not spec.external: - # Make sure log directory exists - log_path = self.log_path - fs.mkdirp(log_path) + if spec.external: + return + # Make sure log directory exists + log_path = self.log_path + fs.mkdirp(log_path) - with fs.working_dir(self.path): - # Link the resulting log file into logs dir - build_log_link = os.path.join( - log_path, "%s-%s.log" % (spec.name, spec.dag_hash(7)) - ) - if os.path.lexists(build_log_link): - os.remove(build_log_link) - symlink(spec.package.build_log_path, build_log_link) + with fs.working_dir(self.path): + # Link the resulting log file into logs dir + build_log_link = os.path.join(log_path, f"{spec.name}-{spec.dag_hash(7)}.log") + if os.path.lexists(build_log_link): + os.remove(build_log_link) + symlink(spec.package.install_log_path, build_log_link) def _partition_roots_by_install_status(self): """Partition root specs into those that do not have to be passed to the diff --git a/lib/spack/spack/installer.py b/lib/spack/spack/installer.py index 0673bdb1d7..b2d2da0d8a 100644 --- a/lib/spack/spack/installer.py +++ b/lib/spack/spack/installer.py @@ -36,6 +36,7 @@ import sys import time from collections import defaultdict +from gzip import GzipFile from typing import Dict, Iterator, List, Optional, Set, Tuple import llnl.util.filesystem as fs @@ -638,13 +639,12 @@ def archive_install_logs(pkg: "spack.package_base.PackageBase", phase_log_dir: s pkg: the package that was built and installed phase_log_dir: path to the archive directory """ - # Archive the whole stdout + stderr for the package - fs.install(pkg.log_path, pkg.install_log_path) - - # Archive all phase log paths - for phase_log in pkg.phase_log_files: - log_file = os.path.basename(phase_log) - fs.install(phase_log, os.path.join(phase_log_dir, log_file)) + # Copy a compressed version of the install log + with open(pkg.log_path, "rb") as f, open(pkg.install_log_path, "wb") as g: + # Use GzipFile directly so we can omit filename / mtime in header + gzip_file = GzipFile(filename="", mode="wb", compresslevel=6, mtime=0, fileobj=g) + shutil.copyfileobj(f, gzip_file) + gzip_file.close() # Archive the install-phase test log, if present pkg.archive_install_test_log() diff --git a/lib/spack/spack/package_base.py b/lib/spack/spack/package_base.py index d23a620984..cc0af82ce5 100644 --- a/lib/spack/spack/package_base.py +++ b/lib/spack/spack/package_base.py @@ -1131,13 +1131,7 @@ def stage(self, stage): @property def env_path(self): """Return the build environment file path associated with staging.""" - # Backward compatibility: Return the name of an existing log path; - # otherwise, return the current install env path name. - old_filename = os.path.join(self.stage.path, "spack-build.env") - if os.path.exists(old_filename): - return old_filename - else: - return os.path.join(self.stage.path, _spack_build_envfile) + return os.path.join(self.stage.path, _spack_build_envfile) @property def env_mods_path(self): @@ -1168,13 +1162,6 @@ def install_env_path(self): @property def log_path(self): """Return the build log file path associated with staging.""" - # Backward compatibility: Return the name of an existing log path. - for filename in ["spack-build.out", "spack-build.txt"]: - old_log = os.path.join(self.stage.path, filename) - if os.path.exists(old_log): - return old_log - - # Otherwise, return the current log path name. return os.path.join(self.stage.path, _spack_build_logfile) @property @@ -1187,15 +1174,15 @@ def phase_log_files(self): @property def install_log_path(self): - """Return the build log file path on successful installation.""" + """Return the (compressed) build log file path on successful installation""" # Backward compatibility: Return the name of an existing install log. - for filename in ["build.out", "build.txt"]: + for filename in [_spack_build_logfile, "build.out", "build.txt"]: old_log = os.path.join(self.metadata_dir, filename) if os.path.exists(old_log): return old_log # Otherwise, return the current install log path name. - return os.path.join(self.metadata_dir, _spack_build_logfile) + return os.path.join(self.metadata_dir, _spack_build_logfile + ".gz") @property def configure_args_path(self): @@ -2088,15 +2075,6 @@ def unit_test_check(self): """ return True - @property - def build_log_path(self): - """ - Return the expected (or current) build log file path. The path points - to the staging build file until the software is successfully installed, - when it points to the file in the installation directory. - """ - return self.install_log_path if self.spec.installed else self.log_path - @classmethod def inject_flags(cls: Type[Pb], name: str, flags: Iterable[str]) -> FLAG_HANDLER_RETURN_TYPE: """ diff --git a/lib/spack/spack/report.py b/lib/spack/spack/report.py index 8e14747d9a..9c56e7edbe 100644 --- a/lib/spack/spack/report.py +++ b/lib/spack/spack/report.py @@ -6,6 +6,7 @@ import collections import contextlib import functools +import gzip import os import time import traceback @@ -190,9 +191,13 @@ def on_success(self, pkg, kwargs, package_record): def fetch_log(self, pkg): try: - with open(pkg.build_log_path, "r", encoding="utf-8") as stream: - return "".join(stream.readlines()) - except Exception: + if os.path.exists(pkg.install_log_path): + stream = gzip.open(pkg.install_log_path, "rt") + else: + stream = open(pkg.log_path) + with stream as f: + return f.read() + except OSError: return f"Cannot open log for {pkg.spec.cshort_spec}" def extract_package_from_signature(self, instance, *args, **kwargs): diff --git a/lib/spack/spack/test/cmd/install.py b/lib/spack/spack/test/cmd/install.py index e304e1a627..17a35b8973 100644 --- a/lib/spack/spack/test/cmd/install.py +++ b/lib/spack/spack/test/cmd/install.py @@ -5,6 +5,7 @@ import argparse import builtins import filecmp +import gzip import itertools import os import pathlib @@ -137,8 +138,7 @@ def test_package_output(tmpdir, capsys, install_mockery, mock_fetch): pkg = spec.package pkg.do_install(verbose=True) - log_file = pkg.build_log_path - with open(log_file) as f: + with gzip.open(pkg.install_log_path, "rt") as f: out = f.read() # make sure that output from the actual package file appears in the diff --git a/lib/spack/spack/test/conftest.py b/lib/spack/spack/test/conftest.py index d5c05e2bfe..1f6e054232 100644 --- a/lib/spack/spack/test/conftest.py +++ b/lib/spack/spack/test/conftest.py @@ -568,8 +568,8 @@ def mock_repo_path(): def _pkg_install_fn(pkg, spec, prefix): # sanity_check_prefix requires something in the install directory mkdirp(prefix.bin) - if not os.path.exists(spec.package.build_log_path): - touchp(spec.package.build_log_path) + if not os.path.exists(spec.package.install_log_path): + touchp(spec.package.install_log_path) @pytest.fixture diff --git a/lib/spack/spack/test/install.py b/lib/spack/spack/test/install.py index bbfeff3639..2b65bc9cfb 100644 --- a/lib/spack/spack/test/install.py +++ b/lib/spack/spack/test/install.py @@ -444,41 +444,15 @@ def test_nosource_pkg_install_post_install(install_mockery, mock_fetch, mock_pac def test_pkg_build_paths(install_mockery): # Get a basic concrete spec for the trivial install package. spec = Spec("trivial-install-test-package").concretized() - - log_path = spec.package.log_path - assert log_path.endswith(_spack_build_logfile) - - env_path = spec.package.env_path - assert env_path.endswith(_spack_build_envfile) - - # Backward compatibility checks - log_dir = os.path.dirname(log_path) - fs.mkdirp(log_dir) - with fs.working_dir(log_dir): - # Start with the older of the previous log filenames - older_log = "spack-build.out" - fs.touch(older_log) - assert spec.package.log_path.endswith(older_log) - - # Now check the newer log filename - last_log = "spack-build.txt" - fs.rename(older_log, last_log) - assert spec.package.log_path.endswith(last_log) - - # Check the old environment file - last_env = "spack-build.env" - fs.rename(last_log, last_env) - assert spec.package.env_path.endswith(last_env) - - # Cleanup - shutil.rmtree(log_dir) + assert spec.package.log_path.endswith(_spack_build_logfile) + assert spec.package.env_path.endswith(_spack_build_envfile) def test_pkg_install_paths(install_mockery): # Get a basic concrete spec for the trivial install package. spec = Spec("trivial-install-test-package").concretized() - log_path = os.path.join(spec.prefix, ".spack", _spack_build_logfile) + log_path = os.path.join(spec.prefix, ".spack", _spack_build_logfile + ".gz") assert spec.package.install_log_path == log_path env_path = os.path.join(spec.prefix, ".spack", _spack_build_envfile)