From 1db6cd5d16a084bffbc319467d70942d0307cc9f Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Tue, 30 Mar 2021 21:03:50 +0200 Subject: [PATCH] Make -j flag less exceptional (#22360) * Make -j flag less exceptional The -j flag in spack behaves differently from make, ctest, ninja, etc, because it caps the number of jobs to an arbitrary number 16. Spack will behave like other tools if `spack install` uses a reasonable default, and `spack install -j ` *overrides* that default. This will be particularly useful for Spack usage outside of a traditional HPC context and for HPC centers that encourage users to compile on login nodes with many cores instead of on compute nodes, which has become increasingly common as individual nodes have more cores. This maintains the existing default value of min(num_cpus, 16). However, as it is right now, Spack does a poor job at determining the number of cpus on linux, since it doesn't take cgroups into account. This is particularly problematic when using distributed builds with slurm. This PR also introduces `spack.util.cpus.cpus_available()` to consolidate knowledge on determining the number of available cores, and improves core detection for linux. This should also improve core detection for Docker/ Kubernetes, which also use cgroups. --- etc/spack/defaults/config.yaml | 12 ++++--- lib/spack/docs/config_yaml.rst | 16 +++++---- lib/spack/spack/build_environment.py | 38 +++++++++++++++++--- lib/spack/spack/cmd/common/arguments.py | 13 ------- lib/spack/spack/config.py | 4 +-- lib/spack/spack/test/build_environment.py | 21 ++++++++++- lib/spack/spack/test/cmd/common/arguments.py | 17 +++------ lib/spack/spack/util/cpus.py | 20 +++++++++++ 8 files changed, 97 insertions(+), 44 deletions(-) create mode 100644 lib/spack/spack/util/cpus.py diff --git a/etc/spack/defaults/config.yaml b/etc/spack/defaults/config.yaml index 7f0b9276ba..61c23f42df 100644 --- a/etc/spack/defaults/config.yaml +++ b/etc/spack/defaults/config.yaml @@ -142,11 +142,13 @@ config: locks: true - # The maximum number of jobs to use when running `make` in parallel, - # always limited by the number of cores available. For instance: - # - If set to 16 on a 4 cores machine `spack install` will run `make -j4` - # - If set to 16 on a 18 cores machine `spack install` will run `make -j16` - # If not set, Spack will use all available cores up to 16. + # The maximum number of jobs to use for the build system (e.g. `make`), when + # the -j flag is not given on the command line. Defaults to 16 when not set. + # Note that the maximum number of jobs is limited by the number of cores + # available, taking thread affinity into account when supported. For instance: + # - With `build_jobs: 16` and 4 cores available `spack install` will run `make -j4` + # - With `build_jobs: 16` and 32 cores available `spack install` will run `make -j16` + # - With `build_jobs: 2` and 4 cores available `spack install -j6` will run `make -j6` # build_jobs: 16 diff --git a/lib/spack/docs/config_yaml.rst b/lib/spack/docs/config_yaml.rst index 66df916193..631b8f6b1b 100644 --- a/lib/spack/docs/config_yaml.rst +++ b/lib/spack/docs/config_yaml.rst @@ -202,21 +202,23 @@ of builds. Unless overridden in a package or on the command line, Spack builds all packages in parallel. The default parallelism is equal to the number of -cores on your machine, up to 16. Parallelism cannot exceed the number of -cores available on the host. For a build system that uses Makefiles, this -means running: +cores available to the process, up to 16 (the default of ``build_jobs``). +For a build system that uses Makefiles, this ``spack install`` runs: - ``make -j``, when ``build_jobs`` is less than the number of - cores on the machine + cores available - ``make -j``, when ``build_jobs`` is greater or equal to the - number of cores on the machine + number of cores available If you work on a shared login node or have a strict ulimit, it may be necessary to set the default to a lower value. By setting ``build_jobs`` to 4, for example, commands like ``spack install`` will run ``make -j4`` -instead of hogging every core. +instead of hogging every core. To build all software in serial, +set ``build_jobs`` to 1. -To build all software in serial, set ``build_jobs`` to 1. +Note that specifying the number of jobs on the command line always takes +priority, so that ``spack install -j`` always runs `make -j`, even +when that exceeds the number of cores available. -------------------- ``ccache`` diff --git a/lib/spack/spack/build_environment.py b/lib/spack/spack/build_environment.py index f7fd76d82a..14bd7babf0 100644 --- a/lib/spack/spack/build_environment.py +++ b/lib/spack/spack/build_environment.py @@ -70,8 +70,7 @@ from spack.util.executable import Executable from spack.util.module_cmd import load_module, path_from_modules, module from spack.util.log_parse import parse_log_events, make_log_context - - +from spack.util.cpus import cpus_available # # This can be set by the user to globally disable parallel builds. # @@ -452,6 +451,38 @@ def set_build_environment_variables(pkg, env, dirty): return env +def determine_number_of_jobs( + parallel=False, command_line=None, config_default=None, max_cpus=None): + """ + Packages that require sequential builds need 1 job. Otherwise we use the + number of jobs set on the command line. If not set, then we use the config + defaults (which is usually set through the builtin config scope), but we + cap to the number of CPUs available to avoid oversubscription. + + Parameters: + parallel (bool): true when package supports parallel builds + command_line (int/None): command line override + config_default (int/None): config default number of jobs + max_cpus (int/None): maximum number of CPUs available. When None, this + value is automatically determined. + """ + if not parallel: + return 1 + + if command_line is None and 'command_line' in spack.config.scopes(): + command_line = spack.config.get('config:build_jobs', scope='command_line') + + if command_line is not None: + return command_line + + max_cpus = max_cpus or cpus_available() + + # in some rare cases _builtin config may not be set, so default to max 16 + config_default = config_default or spack.config.get('config:build_jobs', 16) + + return min(max_cpus, config_default) + + def _set_variables_for_single_module(pkg, module): """Helper function to set module variables for single module.""" # Put a marker on this module so that it won't execute the body of this @@ -460,8 +491,7 @@ def _set_variables_for_single_module(pkg, module): if getattr(module, marker, False): return - jobs = spack.config.get('config:build_jobs', 16) if pkg.parallel else 1 - jobs = min(jobs, multiprocessing.cpu_count()) + jobs = determine_number_of_jobs(parallel=pkg.parallel) m = module m.make_jobs = jobs diff --git a/lib/spack/spack/cmd/common/arguments.py b/lib/spack/spack/cmd/common/arguments.py index e6a0edd599..759fdfc498 100644 --- a/lib/spack/spack/cmd/common/arguments.py +++ b/lib/spack/spack/cmd/common/arguments.py @@ -5,7 +5,6 @@ import argparse -import multiprocessing import spack.cmd import spack.config @@ -102,22 +101,10 @@ def __call__(self, parser, namespace, jobs, option_string): '[expected a positive integer, got "{1}"]' raise ValueError(msg.format(option_string, jobs)) - jobs = min(jobs, multiprocessing.cpu_count()) spack.config.set('config:build_jobs', jobs, scope='command_line') setattr(namespace, 'jobs', jobs) - @property - def default(self): - # This default is coded as a property so that look-up - # of this value is done only on demand - return min(spack.config.get('config:build_jobs', 16), - multiprocessing.cpu_count()) - - @default.setter - def default(self, value): - pass - class DeptypeAction(argparse.Action): """Creates a tuple of valid dependency types from a deptype argument.""" diff --git a/lib/spack/spack/config.py b/lib/spack/spack/config.py index 4f7e61ea69..63839cfb05 100644 --- a/lib/spack/spack/config.py +++ b/lib/spack/spack/config.py @@ -35,7 +35,6 @@ import os import re import sys -import multiprocessing from contextlib import contextmanager from six import iteritems from ordereddict_backport import OrderedDict @@ -61,6 +60,7 @@ import spack.schema.upstreams import spack.schema.env from spack.error import SpackError +from spack.util.cpus import cpus_available # Hacked yaml for configuration files preserves line numbers. import spack.util.spack_yaml as syaml @@ -110,7 +110,7 @@ 'verify_ssl': True, 'checksum': True, 'dirty': False, - 'build_jobs': min(16, multiprocessing.cpu_count()), + 'build_jobs': min(16, cpus_available()), 'build_stage': '$tempdir/spack-stage', 'concretizer': 'original', } diff --git a/lib/spack/spack/test/build_environment.py b/lib/spack/spack/test/build_environment.py index 28f6aefcb8..841a723f96 100644 --- a/lib/spack/spack/test/build_environment.py +++ b/lib/spack/spack/test/build_environment.py @@ -13,9 +13,9 @@ import spack.spec from spack.paths import build_env_path from spack.build_environment import dso_suffix, _static_to_shared_library +from spack.build_environment import determine_number_of_jobs from spack.util.executable import Executable from spack.util.environment import EnvironmentModifications - from llnl.util.filesystem import LibraryList, HeaderList @@ -339,3 +339,22 @@ def test_setting_dtags_based_on_config( dtags_to_add = modifications['SPACK_DTAGS_TO_ADD'][0] assert dtags_to_add.value == expected_flag + + +def test_build_jobs_sequential_is_sequential(): + assert determine_number_of_jobs( + parallel=False, command_line=8, config_default=8, max_cpus=8) == 1 + + +def test_build_jobs_command_line_overrides(): + assert determine_number_of_jobs( + parallel=True, command_line=10, config_default=1, max_cpus=1) == 10 + assert determine_number_of_jobs( + parallel=True, command_line=10, config_default=100, max_cpus=100) == 10 + + +def test_build_jobs_defaults(): + assert determine_number_of_jobs( + parallel=True, command_line=None, config_default=1, max_cpus=10) == 1 + assert determine_number_of_jobs( + parallel=True, command_line=None, config_default=100, max_cpus=10) == 10 diff --git a/lib/spack/spack/test/cmd/common/arguments.py b/lib/spack/spack/test/cmd/common/arguments.py index 2a290d5d43..27648ee383 100644 --- a/lib/spack/spack/test/cmd/common/arguments.py +++ b/lib/spack/spack/test/cmd/common/arguments.py @@ -4,11 +4,9 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import argparse -import multiprocessing import pytest - import spack.cmd import spack.cmd.common.arguments as arguments import spack.config @@ -27,20 +25,15 @@ def job_parser(): yield p -@pytest.mark.parametrize("ncores", [1, 2, 4, 8, 16, 32]) -def test_setting_jobs_flag(job_parser, ncores, monkeypatch): - monkeypatch.setattr(multiprocessing, 'cpu_count', lambda: ncores) +def test_setting_jobs_flag(job_parser): namespace = job_parser.parse_args(['-j', '24']) - expected = min(24, ncores) - assert namespace.jobs == expected - assert spack.config.get('config:build_jobs') == expected + assert namespace.jobs == 24 + assert spack.config.get('config:build_jobs', scope='command_line') == 24 -@pytest.mark.parametrize("ncores", [1, 2, 4, 8, 16, 32]) -def test_omitted_job_flag(job_parser, ncores, monkeypatch): - monkeypatch.setattr(multiprocessing, 'cpu_count', lambda: ncores) +def test_omitted_job_flag(job_parser): namespace = job_parser.parse_args([]) - assert namespace.jobs == min(ncores, 16) + assert namespace.jobs is None assert spack.config.get('config:build_jobs') is None diff --git a/lib/spack/spack/util/cpus.py b/lib/spack/spack/util/cpus.py new file mode 100644 index 0000000000..9fa10c87a8 --- /dev/null +++ b/lib/spack/spack/util/cpus.py @@ -0,0 +1,20 @@ +# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import os +import multiprocessing + + +def cpus_available(): + """ + Returns the number of CPUs available for the current process, or the number + of phyiscal CPUs when that information cannot be retrieved. The number + of available CPUs might differ from the number of physical CPUs when + using spack through Slurm or container runtimes. + """ + try: + return len(os.sched_getaffinity(0)) # novermin + except Exception: + return multiprocessing.cpu_count()