Make -j flag less exceptional (#22360)
* Make -j flag less exceptional The -j flag in spack behaves differently from make, ctest, ninja, etc, because it caps the number of jobs to an arbitrary number 16. Spack will behave like other tools if `spack install` uses a reasonable default, and `spack install -j <num>` *overrides* that default. This will be particularly useful for Spack usage outside of a traditional HPC context and for HPC centers that encourage users to compile on login nodes with many cores instead of on compute nodes, which has become increasingly common as individual nodes have more cores. This maintains the existing default value of min(num_cpus, 16). However, as it is right now, Spack does a poor job at determining the number of cpus on linux, since it doesn't take cgroups into account. This is particularly problematic when using distributed builds with slurm. This PR also introduces `spack.util.cpus.cpus_available()` to consolidate knowledge on determining the number of available cores, and improves core detection for linux. This should also improve core detection for Docker/ Kubernetes, which also use cgroups.
This commit is contained in:
parent
d3a9824ea2
commit
1db6cd5d16
8 changed files with 97 additions and 44 deletions
|
@ -142,11 +142,13 @@ config:
|
|||
locks: true
|
||||
|
||||
|
||||
# The maximum number of jobs to use when running `make` in parallel,
|
||||
# always limited by the number of cores available. For instance:
|
||||
# - If set to 16 on a 4 cores machine `spack install` will run `make -j4`
|
||||
# - If set to 16 on a 18 cores machine `spack install` will run `make -j16`
|
||||
# If not set, Spack will use all available cores up to 16.
|
||||
# The maximum number of jobs to use for the build system (e.g. `make`), when
|
||||
# the -j flag is not given on the command line. Defaults to 16 when not set.
|
||||
# Note that the maximum number of jobs is limited by the number of cores
|
||||
# available, taking thread affinity into account when supported. For instance:
|
||||
# - With `build_jobs: 16` and 4 cores available `spack install` will run `make -j4`
|
||||
# - With `build_jobs: 16` and 32 cores available `spack install` will run `make -j16`
|
||||
# - With `build_jobs: 2` and 4 cores available `spack install -j6` will run `make -j6`
|
||||
# build_jobs: 16
|
||||
|
||||
|
||||
|
|
|
@ -202,21 +202,23 @@ of builds.
|
|||
|
||||
Unless overridden in a package or on the command line, Spack builds all
|
||||
packages in parallel. The default parallelism is equal to the number of
|
||||
cores on your machine, up to 16. Parallelism cannot exceed the number of
|
||||
cores available on the host. For a build system that uses Makefiles, this
|
||||
means running:
|
||||
cores available to the process, up to 16 (the default of ``build_jobs``).
|
||||
For a build system that uses Makefiles, this ``spack install`` runs:
|
||||
|
||||
- ``make -j<build_jobs>``, when ``build_jobs`` is less than the number of
|
||||
cores on the machine
|
||||
cores available
|
||||
- ``make -j<ncores>``, when ``build_jobs`` is greater or equal to the
|
||||
number of cores on the machine
|
||||
number of cores available
|
||||
|
||||
If you work on a shared login node or have a strict ulimit, it may be
|
||||
necessary to set the default to a lower value. By setting ``build_jobs``
|
||||
to 4, for example, commands like ``spack install`` will run ``make -j4``
|
||||
instead of hogging every core.
|
||||
instead of hogging every core. To build all software in serial,
|
||||
set ``build_jobs`` to 1.
|
||||
|
||||
To build all software in serial, set ``build_jobs`` to 1.
|
||||
Note that specifying the number of jobs on the command line always takes
|
||||
priority, so that ``spack install -j<n>`` always runs `make -j<n>`, even
|
||||
when that exceeds the number of cores available.
|
||||
|
||||
--------------------
|
||||
``ccache``
|
||||
|
|
|
@ -70,8 +70,7 @@
|
|||
from spack.util.executable import Executable
|
||||
from spack.util.module_cmd import load_module, path_from_modules, module
|
||||
from spack.util.log_parse import parse_log_events, make_log_context
|
||||
|
||||
|
||||
from spack.util.cpus import cpus_available
|
||||
#
|
||||
# This can be set by the user to globally disable parallel builds.
|
||||
#
|
||||
|
@ -452,6 +451,38 @@ def set_build_environment_variables(pkg, env, dirty):
|
|||
return env
|
||||
|
||||
|
||||
def determine_number_of_jobs(
|
||||
parallel=False, command_line=None, config_default=None, max_cpus=None):
|
||||
"""
|
||||
Packages that require sequential builds need 1 job. Otherwise we use the
|
||||
number of jobs set on the command line. If not set, then we use the config
|
||||
defaults (which is usually set through the builtin config scope), but we
|
||||
cap to the number of CPUs available to avoid oversubscription.
|
||||
|
||||
Parameters:
|
||||
parallel (bool): true when package supports parallel builds
|
||||
command_line (int/None): command line override
|
||||
config_default (int/None): config default number of jobs
|
||||
max_cpus (int/None): maximum number of CPUs available. When None, this
|
||||
value is automatically determined.
|
||||
"""
|
||||
if not parallel:
|
||||
return 1
|
||||
|
||||
if command_line is None and 'command_line' in spack.config.scopes():
|
||||
command_line = spack.config.get('config:build_jobs', scope='command_line')
|
||||
|
||||
if command_line is not None:
|
||||
return command_line
|
||||
|
||||
max_cpus = max_cpus or cpus_available()
|
||||
|
||||
# in some rare cases _builtin config may not be set, so default to max 16
|
||||
config_default = config_default or spack.config.get('config:build_jobs', 16)
|
||||
|
||||
return min(max_cpus, config_default)
|
||||
|
||||
|
||||
def _set_variables_for_single_module(pkg, module):
|
||||
"""Helper function to set module variables for single module."""
|
||||
# Put a marker on this module so that it won't execute the body of this
|
||||
|
@ -460,8 +491,7 @@ def _set_variables_for_single_module(pkg, module):
|
|||
if getattr(module, marker, False):
|
||||
return
|
||||
|
||||
jobs = spack.config.get('config:build_jobs', 16) if pkg.parallel else 1
|
||||
jobs = min(jobs, multiprocessing.cpu_count())
|
||||
jobs = determine_number_of_jobs(parallel=pkg.parallel)
|
||||
|
||||
m = module
|
||||
m.make_jobs = jobs
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
|
||||
import argparse
|
||||
import multiprocessing
|
||||
|
||||
import spack.cmd
|
||||
import spack.config
|
||||
|
@ -102,22 +101,10 @@ def __call__(self, parser, namespace, jobs, option_string):
|
|||
'[expected a positive integer, got "{1}"]'
|
||||
raise ValueError(msg.format(option_string, jobs))
|
||||
|
||||
jobs = min(jobs, multiprocessing.cpu_count())
|
||||
spack.config.set('config:build_jobs', jobs, scope='command_line')
|
||||
|
||||
setattr(namespace, 'jobs', jobs)
|
||||
|
||||
@property
|
||||
def default(self):
|
||||
# This default is coded as a property so that look-up
|
||||
# of this value is done only on demand
|
||||
return min(spack.config.get('config:build_jobs', 16),
|
||||
multiprocessing.cpu_count())
|
||||
|
||||
@default.setter
|
||||
def default(self, value):
|
||||
pass
|
||||
|
||||
|
||||
class DeptypeAction(argparse.Action):
|
||||
"""Creates a tuple of valid dependency types from a deptype argument."""
|
||||
|
|
|
@ -35,7 +35,6 @@
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
import multiprocessing
|
||||
from contextlib import contextmanager
|
||||
from six import iteritems
|
||||
from ordereddict_backport import OrderedDict
|
||||
|
@ -61,6 +60,7 @@
|
|||
import spack.schema.upstreams
|
||||
import spack.schema.env
|
||||
from spack.error import SpackError
|
||||
from spack.util.cpus import cpus_available
|
||||
|
||||
# Hacked yaml for configuration files preserves line numbers.
|
||||
import spack.util.spack_yaml as syaml
|
||||
|
@ -110,7 +110,7 @@
|
|||
'verify_ssl': True,
|
||||
'checksum': True,
|
||||
'dirty': False,
|
||||
'build_jobs': min(16, multiprocessing.cpu_count()),
|
||||
'build_jobs': min(16, cpus_available()),
|
||||
'build_stage': '$tempdir/spack-stage',
|
||||
'concretizer': 'original',
|
||||
}
|
||||
|
|
|
@ -13,9 +13,9 @@
|
|||
import spack.spec
|
||||
from spack.paths import build_env_path
|
||||
from spack.build_environment import dso_suffix, _static_to_shared_library
|
||||
from spack.build_environment import determine_number_of_jobs
|
||||
from spack.util.executable import Executable
|
||||
from spack.util.environment import EnvironmentModifications
|
||||
|
||||
from llnl.util.filesystem import LibraryList, HeaderList
|
||||
|
||||
|
||||
|
@ -339,3 +339,22 @@ def test_setting_dtags_based_on_config(
|
|||
|
||||
dtags_to_add = modifications['SPACK_DTAGS_TO_ADD'][0]
|
||||
assert dtags_to_add.value == expected_flag
|
||||
|
||||
|
||||
def test_build_jobs_sequential_is_sequential():
|
||||
assert determine_number_of_jobs(
|
||||
parallel=False, command_line=8, config_default=8, max_cpus=8) == 1
|
||||
|
||||
|
||||
def test_build_jobs_command_line_overrides():
|
||||
assert determine_number_of_jobs(
|
||||
parallel=True, command_line=10, config_default=1, max_cpus=1) == 10
|
||||
assert determine_number_of_jobs(
|
||||
parallel=True, command_line=10, config_default=100, max_cpus=100) == 10
|
||||
|
||||
|
||||
def test_build_jobs_defaults():
|
||||
assert determine_number_of_jobs(
|
||||
parallel=True, command_line=None, config_default=1, max_cpus=10) == 1
|
||||
assert determine_number_of_jobs(
|
||||
parallel=True, command_line=None, config_default=100, max_cpus=10) == 10
|
||||
|
|
|
@ -4,11 +4,9 @@
|
|||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import argparse
|
||||
import multiprocessing
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
import spack.cmd
|
||||
import spack.cmd.common.arguments as arguments
|
||||
import spack.config
|
||||
|
@ -27,20 +25,15 @@ def job_parser():
|
|||
yield p
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ncores", [1, 2, 4, 8, 16, 32])
|
||||
def test_setting_jobs_flag(job_parser, ncores, monkeypatch):
|
||||
monkeypatch.setattr(multiprocessing, 'cpu_count', lambda: ncores)
|
||||
def test_setting_jobs_flag(job_parser):
|
||||
namespace = job_parser.parse_args(['-j', '24'])
|
||||
expected = min(24, ncores)
|
||||
assert namespace.jobs == expected
|
||||
assert spack.config.get('config:build_jobs') == expected
|
||||
assert namespace.jobs == 24
|
||||
assert spack.config.get('config:build_jobs', scope='command_line') == 24
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ncores", [1, 2, 4, 8, 16, 32])
|
||||
def test_omitted_job_flag(job_parser, ncores, monkeypatch):
|
||||
monkeypatch.setattr(multiprocessing, 'cpu_count', lambda: ncores)
|
||||
def test_omitted_job_flag(job_parser):
|
||||
namespace = job_parser.parse_args([])
|
||||
assert namespace.jobs == min(ncores, 16)
|
||||
assert namespace.jobs is None
|
||||
assert spack.config.get('config:build_jobs') is None
|
||||
|
||||
|
||||
|
|
20
lib/spack/spack/util/cpus.py
Normal file
20
lib/spack/spack/util/cpus.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
|
||||
# Spack Project Developers. See the top-level COPYRIGHT file for details.
|
||||
#
|
||||
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
|
||||
|
||||
import os
|
||||
import multiprocessing
|
||||
|
||||
|
||||
def cpus_available():
|
||||
"""
|
||||
Returns the number of CPUs available for the current process, or the number
|
||||
of phyiscal CPUs when that information cannot be retrieved. The number
|
||||
of available CPUs might differ from the number of physical CPUs when
|
||||
using spack through Slurm or container runtimes.
|
||||
"""
|
||||
try:
|
||||
return len(os.sched_getaffinity(0)) # novermin
|
||||
except Exception:
|
||||
return multiprocessing.cpu_count()
|
Loading…
Reference in a new issue