build output: filter padding out of console output when padded_length is used (#24514)

Spack allows users to set `padded_length` to pad out the installation path in
build farms so that any binaries created are more easily relocatable. The issue
with this is that the padding dominates installation output and makes it
difficult to see what is going on. The padding also causes logs to easily
exceed size limits for things like GitLab artifacts.

This PR fixes this by adding a filter in the logger daemon. If you use a
setting like this:

config:
    install_tree:
        padded_length: 512

Then lines like this in the output:

==> [2021-06-23-15:59:05.020387] './configure' '--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga

will be replaced with the much more readable:

==> [2021-06-23-15:59:05.020387] './configure' '--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-512-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga

You can see that the padding has been replaced with `[padded-to-512-chars]` to
indicate the total number of characters in the padded prefix. Over a long log
file, this should save a lot of space and allow us to see error messages in
GitHub/GitLab log output.

The *actual* build logs still have full paths in them. Also lines that are
output by Spack and not by a package build are not filtered and will still
display the fully padded path. There aren't that many of these, so the change
should still help reduce file size and readability quite a bit.
This commit is contained in:
Todd Gamblin 2021-07-12 14:48:52 -07:00 committed by GitHub
parent 34b763f792
commit f58b2e03ca
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 188 additions and 7 deletions

View file

@ -436,7 +436,7 @@ class log_output(object):
""" """
def __init__(self, file_like=None, echo=False, debug=0, buffer=False, def __init__(self, file_like=None, echo=False, debug=0, buffer=False,
env=None): env=None, filter_fn=None):
"""Create a new output log context manager. """Create a new output log context manager.
Args: Args:
@ -446,6 +446,8 @@ def __init__(self, file_like=None, echo=False, debug=0, buffer=False,
debug (int): positive to enable tty debug mode during logging debug (int): positive to enable tty debug mode during logging
buffer (bool): pass buffer=True to skip unbuffering output; note buffer (bool): pass buffer=True to skip unbuffering output; note
this doesn't set up any *new* buffering this doesn't set up any *new* buffering
filter_fn (callable, optional): Callable[str] -> str to filter each
line of output
log_output can take either a file object or a filename. If a log_output can take either a file object or a filename. If a
filename is passed, the file will be opened and closed entirely filename is passed, the file will be opened and closed entirely
@ -465,6 +467,7 @@ def __init__(self, file_like=None, echo=False, debug=0, buffer=False,
self.debug = debug self.debug = debug
self.buffer = buffer self.buffer = buffer
self.env = env # the environment to use for _writer_daemon self.env = env # the environment to use for _writer_daemon
self.filter_fn = filter_fn
self._active = False # used to prevent re-entry self._active = False # used to prevent re-entry
@ -543,7 +546,7 @@ def __enter__(self):
target=_writer_daemon, target=_writer_daemon,
args=( args=(
input_multiprocess_fd, read_multiprocess_fd, write_fd, input_multiprocess_fd, read_multiprocess_fd, write_fd,
self.echo, self.log_file, child_pipe self.echo, self.log_file, child_pipe, self.filter_fn
) )
) )
self.process.daemon = True # must set before start() self.process.daemon = True # must set before start()
@ -667,7 +670,7 @@ def force_echo(self):
def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo, def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo,
log_file_wrapper, control_pipe): log_file_wrapper, control_pipe, filter_fn):
"""Daemon used by ``log_output`` to write to a log file and to ``stdout``. """Daemon used by ``log_output`` to write to a log file and to ``stdout``.
The daemon receives output from the parent process and writes it both The daemon receives output from the parent process and writes it both
@ -712,6 +715,7 @@ def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo,
log_file_wrapper (FileWrapper): file to log all output log_file_wrapper (FileWrapper): file to log all output
control_pipe (Pipe): multiprocessing pipe on which to send control control_pipe (Pipe): multiprocessing pipe on which to send control
information to the parent information to the parent
filter_fn (callable, optional): function to filter each line of output
""" """
# If this process was forked, then it will inherit file descriptors from # If this process was forked, then it will inherit file descriptors from
@ -784,7 +788,10 @@ def _writer_daemon(stdin_multiprocess_fd, read_multiprocess_fd, write_fd, echo,
# Echo to stdout if requested or forced. # Echo to stdout if requested or forced.
if echo or force_echo: if echo or force_echo:
sys.stdout.write(clean_line) output_line = clean_line
if filter_fn:
output_line = filter_fn(clean_line)
sys.stdout.write(output_line)
# Stripped output to log file. # Stripped output to log file.
log_file.write(_strip(clean_line)) log_file.write(_strip(clean_line))

View file

@ -1692,6 +1692,12 @@ def build_process(pkg, kwargs):
verbose = kwargs.get('verbose', False) verbose = kwargs.get('verbose', False)
timer = Timer() timer = Timer()
# If we are using a padded path, filter the output to compress padded paths
# The real log still has full-length paths.
filter_padding = spack.config.get("config:install_tree:padded_length", None)
filter_fn = spack.util.path.padding_filter if filter_padding else None
if not fake: if not fake:
if not skip_patch: if not skip_patch:
pkg.do_patch() pkg.do_patch()
@ -1764,8 +1770,10 @@ def build_process(pkg, kwargs):
try: try:
# DEBUGGING TIP - to debug this section, insert an IPython # DEBUGGING TIP - to debug this section, insert an IPython
# embed here, and run the sections below without log capture # embed here, and run the sections below without log capture
with log_output(log_file, echo, True, with log_output(
env=unmodified_env) as logger: log_file, echo, True, env=unmodified_env,
filter_fn=filter_fn
) as logger:
with logger.force_echo(): with logger.force_echo():
inner_debug_level = tty.debug_level() inner_debug_level = tty.debug_level()

View file

@ -78,6 +78,39 @@ def test_log_python_output_and_echo_output(capfd, tmpdir):
assert capfd.readouterr()[0] == 'force echo\n' assert capfd.readouterr()[0] == 'force echo\n'
def _log_filter_fn(string):
return string.replace("foo", "bar")
def test_log_output_with_filter(capfd, tmpdir):
with tmpdir.as_cwd():
with log_output('foo.txt', filter_fn=_log_filter_fn):
print('foo blah')
print('blah foo')
print('foo foo')
# foo.txt output is not filtered
with open('foo.txt') as f:
assert f.read() == 'foo blah\nblah foo\nfoo foo\n'
# output is not echoed
assert capfd.readouterr()[0] == ''
# now try with echo
with tmpdir.as_cwd():
with log_output('foo.txt', echo=True, filter_fn=_log_filter_fn):
print('foo blah')
print('blah foo')
print('foo foo')
# foo.txt output is still not filtered
with open('foo.txt') as f:
assert f.read() == 'foo blah\nblah foo\nfoo foo\n'
# echoed output is filtered.
assert capfd.readouterr()[0] == 'bar blah\nblah bar\nbar bar\n'
@pytest.mark.skipif(not which('echo'), reason="needs echo command") @pytest.mark.skipif(not which('echo'), reason="needs echo command")
def test_log_subproc_and_echo_output_no_capfd(capfd, tmpdir): def test_log_subproc_and_echo_output_no_capfd(capfd, tmpdir):
echo = which('echo') echo = which('echo')

View file

@ -0,0 +1,59 @@
# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
# Spack Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import pytest
import spack.util.path as sup
#: Some lines with lots of placeholders
padded_lines = [
"==> [2021-06-23-15:59:05.020387] './configure' '--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga", # noqa: E501
"/Users/gamblin2/Workspace/spack/lib/spack/env/clang/clang -dynamiclib -install_name /Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga/lib/libz.1.dylib -compatibility_version 1 -current_version 1.2.11 -fPIC -O2 -fPIC -DHAVE_HIDDEN -o libz.1.2.11.dylib adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo -lc", # noqa: E501
"rm -f /Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_placeholder__/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga/lib/libz.a", # noqa: E501
]
#: unpadded versions of padded_lines, with [padded-to-X-chars] replacing the padding
fixed_lines = [
"==> [2021-06-23-15:59:05.020387] './configure' '--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-512-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga", # noqa: E501
"/Users/gamblin2/Workspace/spack/lib/spack/env/clang/clang -dynamiclib -install_name /Users/gamblin2/padding-log-test/opt/[padded-to-512-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga/lib/libz.1.dylib -compatibility_version 1 -current_version 1.2.11 -fPIC -O2 -fPIC -DHAVE_HIDDEN -o libz.1.2.11.dylib adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo -lc", # noqa: E501
"rm -f /Users/gamblin2/padding-log-test/opt/[padded-to-512-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga/lib/libz.a", # noqa: E501
]
@pytest.mark.parametrize("padded,fixed", zip(padded_lines, fixed_lines))
def test_padding_substitution(padded, fixed):
"""Ensure that all padded lines are unpadded correctly."""
assert fixed == sup.padding_filter(padded)
def test_no_substitution():
"""Ensure that a line not containing one full path placeholder is not modified."""
partial = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_pla/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
assert sup.padding_filter(partial) == partial
def test_short_substitution():
"""Ensure that a single placeholder path component is replaced"""
short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-63-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
assert short_subst == sup.padding_filter(short)
def test_partial_substitution():
"""Ensure that a single placeholder path component is replaced"""
short = "--prefix=/Users/gamblin2/padding-log-test/opt/__spack_path_placeholder__/__spack_p/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
short_subst = "--prefix=/Users/gamblin2/padding-log-test/opt/[padded-to-73-chars]/darwin-bigsur-skylake/apple-clang-12.0.5/zlib-1.2.11-74mwnxgn6nujehpyyalhwizwojwn5zga'" # noqa: E501
assert short_subst == sup.padding_filter(short)
def test_longest_prefix_re():
"""Test that longest_prefix_re generates correct regular expressions."""
assert "(s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
"string", capture=True
)
assert "(?:s(?:t(?:r(?:i(?:ng?)?)?)?)?)" == sup.longest_prefix_re(
"string", capture=False
)

View file

@ -44,7 +44,11 @@
# --------------------- # ---------------------
# total -> 300 # total -> 300
SPACK_MAX_INSTALL_PATH_LENGTH = 300 SPACK_MAX_INSTALL_PATH_LENGTH = 300
SPACK_PATH_PADDING_CHARS = 'spack_path_placeholder'
#: Padded paths comprise directories with this name (or some prefix of it). :
#: It starts with two underscores to make it unlikely that prefix matches would
#: include some other component of the intallation path.
SPACK_PATH_PADDING_CHARS = '__spack_path_placeholder__'
@memoized @memoized
@ -158,3 +162,73 @@ def canonicalize_path(path):
tty.debug("Using current working directory as base for abspath") tty.debug("Using current working directory as base for abspath")
return os.path.normpath(path) return os.path.normpath(path)
def longest_prefix_re(string, capture=True):
"""Return a regular expression that matches a the longest possible prefix of string.
i.e., if the input string is ``the_quick_brown_fox``, then::
m = re.compile(longest_prefix('the_quick_brown_fox'))
m.match('the_').group(1) == 'the_'
m.match('the_quick').group(1) == 'the_quick'
m.match('the_quick_brown_fox').group(1) == 'the_quick_brown_fox'
m.match('the_xquick_brown_fox').group(1) == 'the_'
m.match('the_quickx_brown_fox').group(1) == 'the_quick'
"""
if len(string) < 2:
return string
return "(%s%s%s?)" % (
"" if capture else "?:",
string[0],
longest_prefix_re(string[1:], capture=False)
)
#: regex cache for padding_filter function
_filter_re = None
def padding_filter(string):
"""Filter used to reduce output from path padding in log output.
This turns paths like this:
/foo/bar/__spack_path_placeholder__/__spack_path_placeholder__/...
Into paths like this:
/foo/bar/[padded-to-512-chars]/...
Where ``padded-to-512-chars`` indicates that the prefix was padded with
placeholders until it hit 512 characters. The actual value of this number
depends on what the `install_tree``'s ``padded_length`` is configured to.
For a path to match and be filtered, the placeholder must appear in its
entirety at least one time. e.g., "/spack/" would not be filtered, but
"/__spack_path_placeholder__/spack/" would be.
"""
global _filter_re
pad = spack.util.path.SPACK_PATH_PADDING_CHARS
if not _filter_re:
longest_prefix = longest_prefix_re(pad)
regex = (
r"((?:/[^/\s]*)*?)" # zero or more leading non-whitespace path components
r"(/{pad})+" # the padding string repeated one or more times
r"(/{longest_prefix})?(?=/)" # trailing prefix of padding as path component
)
regex = regex.replace("/", os.sep)
regex = regex.format(pad=pad, longest_prefix=longest_prefix)
_filter_re = re.compile(regex)
def replacer(match):
return "%s%s[padded-to-%d-chars]" % (
match.group(1),
os.sep,
len(match.group(0))
)
return _filter_re.sub(replacer, string)