From f58b2e03ca5611a92f559f4b81eeae883c7b274a Mon Sep 17 00:00:00 2001 From: Todd Gamblin Date: Mon, 12 Jul 2021 14:48:52 -0700 Subject: [PATCH] build output: filter padding out of console output when `padded_length` is used (#24514) Spack allows users to set `padded_length` to pad out the installation path in build farms so that any binaries created are more easily relocatable. The issue with this is that the padding dominates installation output and makes it difficult to see what is going on. The padding also causes logs to easily exceed size limits for things like GitLab artifacts. This PR fixes this by adding a filter in the logger daemon. If you use a setting like this: config: install_tree: padded_length: 512 Then lines like this in the output: ==> [2021-06-23-15:59:05.020387] './configure' '--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga will be replaced with the much more readable: ==> [2021-06-23-15:59:05.020387] './configure' '--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-512-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga You can see that the padding has been replaced with `[padded-to-512-chars]` to indicate the total number of characters in the padded prefix. Over a long log file, this should save a lot of space and allow us to see error messages in GitHub/GitLab log output. The *actual* build logs still have full paths in them. Also lines that are output by Spack and not by a package build are not filtered and will still display the fully padded path. There aren't that many of these, so the change should still help reduce file size and readability quite a bit. --- lib/spack/llnl/util/tty/log.py | 15 +++-- lib/spack/spack/installer.py | 12 +++- lib/spack/spack/test/llnl/util/tty/log.py | 33 ++++++++++ lib/spack/spack/test/util/path.py | 59 ++++++++++++++++++ lib/spack/spack/util/path.py | 76 ++++++++++++++++++++++- 5 files changed, 188 insertions(+), 7 deletions(-) create mode 100644 lib/spack/spack/test/util/path.py diff --git a/lib/spack/llnl/util/tty/log.py b/lib/spack/llnl/util/tty/log.py index 947e79b8c9..f081893c85 100644 --- a/lib/spack/llnl/util/tty/log.py +++ b/lib/spack/llnl/util/tty/log.py @@ -436,7 +436,7 @@ class log_output(object): """ def __init__(self, file_like=None, echo=False, debug=0, buffer=False, - env=None): + env=None, filter_fn=None): """Create a new output log context manager. Args: @@ -446,6 +446,8 @@ def __init__(self, file_like=None, echo=False, debug=0, buffer=False, debug (int): positive to enable tty debug mode during logging buffer (bool): pass buffer=True to skip unbuffering output; note this doesn't set up any *new* buffering + filter_fn (callable, optional): Callable[str] -> str to filter each + line of output log_output can take either a file object or a filename. If a filename is passed, the file will be opened and closed entirely @@ -465,6 +467,7 @@ def __init__(self, file_like=None, echo=False, debug=0, buffer=False, self.debug = debug self.buffer = buffer self.env = env # the environment to use for _writer_daemon + self.filter_fn = filter_fn self._active = False # used to prevent re-entry @@ -543,7 +546,7 @@ def __enter__(self): target=_writer_daemon, args=( input_multiprocess_fd, read_multiprocess_fd, write_fd, - self.echo, self.log_file, child_pipe + self.echo, self.log_file, child_pipe, self.filter_fn ) ) self.process.daemon = True # must set before start() @@ -667,7 +670,7 @@ def force_echo(self): def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo, - log_file_wrapper, control_pipe): + log_file_wrapper, control_pipe, filter_fn): """Daemon used by ``log_output`` to write to a log file and to ``stdout``. The daemon receives output from the parent process and writes it both @@ -712,6 +715,7 @@ def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo, log_file_wrapper (FileWrapper): file to log all output control_pipe (Pipe): multiprocessing pipe on which to send control information to the parent + filter_fn (callable, optional): function to filter each line of output """ # If this process was forked, then it will inherit file descriptors from @@ -784,7 +788,10 @@ def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo, # Echo to stdout if requested or forced. if echo or force_echo: - sys.stdout.write(clean_line) + output_line = clean_line + if filter_fn: + output_line = filter_fn(clean_line) + sys.stdout.write(output_line) # Stripped output to log file. log_file.write(_strip(clean_line)) diff --git a/lib/spack/spack/installer.py b/lib/spack/spack/installer.py index c6eebf3041..0cbb71ef4b 100644 --- a/lib/spack/spack/installer.py +++ b/lib/spack/spack/installer.py @@ -1692,6 +1692,12 @@ def build_process(pkg, kwargs): verbose = kwargs.get('verbose', False) timer = Timer() + + # If we are using a padded path, filter the output to compress padded paths + # The real log still has full-length paths. + filter_padding = spack.config.get("config:install_tree:padded_length", None) + filter_fn = spack.util.path.padding_filter if filter_padding else None + if not fake: if not skip_patch: pkg.do_patch() @@ -1764,8 +1770,10 @@ def build_process(pkg, kwargs): try: # DEBUGGING TIP - to debug this section, insert an IPython # embed here, and run the sections below without log capture - with log_output(log_file, echo, True, - env=unmodified_env) as logger: + with log_output( + log_file, echo, True, env=unmodified_env, + filter_fn=filter_fn + ) as logger: with logger.force_echo(): inner_debug_level = tty.debug_level() diff --git a/lib/spack/spack/test/llnl/util/tty/log.py b/lib/spack/spack/test/llnl/util/tty/log.py index 279985001d..a65073e657 100644 --- a/lib/spack/spack/test/llnl/util/tty/log.py +++ b/lib/spack/spack/test/llnl/util/tty/log.py @@ -78,6 +78,39 @@ def test_log_python_output_and_echo_output(capfd, tmpdir): assert capfd.readouterr()[0] == 'force echo\n' +def _log_filter_fn(string): + return string.replace("foo", "bar") + + +def test_log_output_with_filter(capfd, tmpdir): + with tmpdir.as_cwd(): + with log_output('foo.txt', filter_fn=_log_filter_fn): + print('foo blah') + print('blah foo') + print('foo foo') + + # foo.txt output is not filtered + with open('foo.txt') as f: + assert f.read() == 'foo blah\nblah foo\nfoo foo\n' + + # output is not echoed + assert capfd.readouterr()[0] == '' + + # now try with echo + with tmpdir.as_cwd(): + with log_output('foo.txt', echo=True, filter_fn=_log_filter_fn): + print('foo blah') + print('blah foo') + print('foo foo') + + # foo.txt output is still not filtered + with open('foo.txt') as f: + assert f.read() == 'foo blah\nblah foo\nfoo foo\n' + + # echoed output is filtered. + assert capfd.readouterr()[0] == 'bar blah\nblah bar\nbar bar\n' + + @pytest.mark.skipif(not which('echo'), reason="needs echo command") def test_log_subproc_and_echo_output_no_capfd(capfd, tmpdir): echo = which('echo') diff --git a/lib/spack/spack/test/util/path.py b/lib/spack/spack/test/util/path.py new file mode 100644 index 0000000000..532bf03178 --- /dev/null +++ b/lib/spack/spack/test/util/path.py @@ -0,0 +1,59 @@ +# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import pytest + +import spack.util.path as sup + +#: Some lines with lots of placeholders +padded_lines = [ + "==> [2021-06-23-15:59:05.020387] './configure' '--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga", # noqa: E501 + "/Users/gamblin2/Workspace/spack/lib/spack/env/clang/clang -dynamiclib -install_name /Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga/lib/libz.1.dylib -compatibility_version 1 -current_version 1.2.11 -fPIC -O2 -fPIC -DHAVE_HIDDEN -o libz.1.2.11.dylib adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo -lc", # noqa: E501 + "rm -f /Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga/lib/libz.a", # noqa: E501 +] + + +#: unpadded versions of padded_lines, with [padded-to-X-chars] replacing the padding +fixed_lines = [ + "==> [2021-06-23-15:59:05.020387] './configure' '--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-512-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga", # noqa: E501 + "/Users/gamblin2/Workspace/spack/lib/spack/env/clang/clang -dynamiclib -install_name /Users/gamblin2/padding-log-test/opt/[padded-to-512-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga/lib/libz.1.dylib -compatibility_version 1 -current_version 1.2.11 -fPIC -O2 -fPIC -DHAVE_HIDDEN -o libz.1.2.11.dylib adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo -lc", # noqa: E501 + "rm -f /Users/gamblin2/padding-log-test/opt/[padded-to-512-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga/lib/libz.a", # noqa: E501 +] + + +@pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines)) +def test_padding_substitution(padded, fixed): + """Ensure that all padded lines are unpadded correctly.""" + assert fixed == sup.padding_filter(padded) + + +def test_no_substitution(): + """Ensure that a line not containing one full path placeholder is not modified.""" + partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + assert sup.padding_filter(partial) == partial + + +def test_short_substitution(): + """Ensure that a single placeholder path component is replaced""" + short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + assert short_subst == sup.padding_filter(short) + + +def test_partial_substitution(): + """Ensure that a single placeholder path component is replaced""" + short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501 + assert short_subst == sup.padding_filter(short) + + +def test_longest_prefix_re(): + """Test that longest_prefix_re generates correct regular expressions.""" + assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( + "string", capture=True + ) + assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re( + "string", capture=False + ) diff --git a/lib/spack/spack/util/path.py b/lib/spack/spack/util/path.py index b13814f4e3..1848af1440 100644 --- a/lib/spack/spack/util/path.py +++ b/lib/spack/spack/util/path.py @@ -44,7 +44,11 @@ # --------------------- # total -> 300 SPACK_MAX_INSTALL_PATH_LENGTH = 300 -SPACK_PATH_PADDING_CHARS = 'spack_path_placeholder' + +#: Padded paths comprise directories with this name (or some prefix of it). : +#: It starts with two underscores to make it unlikely that prefix matches would +#: include some other component of the intallation path. +SPACK_PATH_PADDING_CHARS = '__spack_path_placeholder__' @memoized @@ -158,3 +162,73 @@ def canonicalize_path(path): tty.debug("Using current working directory as base for abspath") return os.path.normpath(path) + + +def longest_prefix_re(string, capture=True): + """Return a regular expression that matches a the longest possible prefix of string. + + i.e., if the input string is ``the_quick_brown_fox``, then:: + + m = re.compile(longest_prefix('the_quick_brown_fox')) + m.match('the_').group(1) == 'the_' + m.match('the_quick').group(1) == 'the_quick' + m.match('the_quick_brown_fox').group(1) == 'the_quick_brown_fox' + m.match('the_xquick_brown_fox').group(1) == 'the_' + m.match('the_quickx_brown_fox').group(1) == 'the_quick' + + """ + if len(string) < 2: + return string + + return "(%s%s%s?)" % ( + "" if capture else "?:", + string[0], + longest_prefix_re(string[1:], capture=False) + ) + + +#: regex cache for padding_filter function +_filter_re = None + + +def padding_filter(string): + """Filter used to reduce output from path padding in log output. + + This turns paths like this: + + /foo/bar/__spack_path_placeholder__/__spack_path_placeholder__/... + + Into paths like this: + + /foo/bar/[padded-to-512-chars]/... + + Where ``padded-to-512-chars`` indicates that the prefix was padded with + placeholders until it hit 512 characters. The actual value of this number + depends on what the `install_tree``'s ``padded_length`` is configured to. + + For a path to match and be filtered, the placeholder must appear in its + entirety at least one time. e.g., "/spack/" would not be filtered, but + "/__spack_path_placeholder__/spack/" would be. + + """ + global _filter_re + + pad = spack.util.path.SPACK_PATH_PADDING_CHARS + if not _filter_re: + longest_prefix = longest_prefix_re(pad) + regex = ( + r"((?:/[^/\s]*)*?)" # zero or more leading non-whitespace path components + r"(/{pad})+" # the padding string repeated one or more times + r"(/{longest_prefix})?(?=/)" # trailing prefix of padding as path component + ) + regex = regex.replace("/", os.sep) + regex = regex.format(pad=pad, longest_prefix=longest_prefix) + _filter_re = re.compile(regex) + + def replacer(match): + return "%s%s[padded-to-%d-chars]" % ( + match.group(1), + os.sep, + len(match.group(0)) + ) + return _filter_re.sub(replacer, string)