Makefile generator for parallel spack install of environments (#30254)

`make` solves a lot of headaches that would otherwise have to be implemented in Spack:

1. Parallelism over packages through multiple `spack install` processes
2. Orderly output of parallel package installs thanks to `make --sync-output=recurse` or `make -Orecurse` (works well in GNU Make 4.3; macOS is unfortunately on a 16 years old 3.x version, but it's one `spack install gmake` away...)
3. Shared jobserver across packages, which means a single `-j` to rule them all, instead of manually finding a balance between `#spack install processes` & `#jobs per package` (See #30302).

This pr adds the `spack env depfile` command that generates a Makefile with dag hashes as
targets, and dag hashes of dependencies as prerequisites, and a command
along the lines of `spack install --only=packages /hash` to just install
a single package.

It exposes two convenient phony targets: `all`, `fetch-all`. The former installs the environment, the latter just fetches all sources. So one can either use `make all -j16` directly or run `make fetch-all -j16` on a login node and `make all -j16` on a compute node. 

Example:

```yaml
spack:
  specs: [perl]
  view: false
```

running

```
$ spack -e . env depfile --make-target-prefix env | tee Makefile
```
generates

```Makefile
SPACK ?= spack

.PHONY: env/all env/fetch-all env/clean

env/all: env/env

env/fetch-all: env/fetch

env/env: env/.install/cdqldivylyxocqymwnfzmzc5sx2zwvww
	@touch $@

env/fetch: env/.fetch/cdqldivylyxocqymwnfzmzc5sx2zwvww env/.fetch/gv5kin2xnn33uxyfte6k4a3bynhmtxze env/.fetch/cuymc7e5gupwyu7vza5d4vrbuslk277p env/.fetch/7vangk4jvsdgw6u6oe6ob63pyjl5cbgk env/.fetch/hyb7ehxxyqqp2hiw56bzm5ampkw6cxws env/.fetch/yfz2agazed7ohevqvnrmm7jfkmsgwjao env/.fetch/73t7ndb5w72hrat5hsax4caox2sgumzu env/.fetch/trvdyncxzfozxofpm3cwgq4vecpxixzs env/.fetch/sbzszb7v557ohyd6c2ekirx2t3ctxfxp env/.fetch/c4go4gxlcznh5p5nklpjm644epuh3pzc
	@touch $@

env/dirs:
	@mkdir -p env/.fetch env/.install

env/.fetch/%: | env/dirs
	$(info Fetching $(SPEC))
	$(SPACK) -e '/tmp/tmp.7PHPSIRACv' fetch $(SPACK_FETCH_FLAGS) /$(notdir $@) && touch $@

env/.install/%: env/.fetch/%
	$(info Installing $(SPEC))
	+$(SPACK) -e '/tmp/tmp.7PHPSIRACv' install $(SPACK_INSTALL_FLAGS) --only-concrete --only=package --no-add /$(notdir $@) && touch $@

# Set the human-readable spec for each target
env/%/cdqldivylyxocqymwnfzmzc5sx2zwvww: SPEC = perl@5.34.1%gcc@10.3.0+cpanm+shared+threads arch=linux-ubuntu20.04-zen2
env/%/gv5kin2xnn33uxyfte6k4a3bynhmtxze: SPEC = berkeley-db@18.1.40%gcc@10.3.0+cxx~docs+stl patches=b231fcc arch=linux-ubuntu20.04-zen2
env/%/cuymc7e5gupwyu7vza5d4vrbuslk277p: SPEC = bzip2@1.0.8%gcc@10.3.0~debug~pic+shared arch=linux-ubuntu20.04-zen2
env/%/7vangk4jvsdgw6u6oe6ob63pyjl5cbgk: SPEC = diffutils@3.8%gcc@10.3.0 arch=linux-ubuntu20.04-zen2
env/%/hyb7ehxxyqqp2hiw56bzm5ampkw6cxws: SPEC = libiconv@1.16%gcc@10.3.0 libs=shared,static arch=linux-ubuntu20.04-zen2
env/%/yfz2agazed7ohevqvnrmm7jfkmsgwjao: SPEC = gdbm@1.19%gcc@10.3.0 arch=linux-ubuntu20.04-zen2
env/%/73t7ndb5w72hrat5hsax4caox2sgumzu: SPEC = readline@8.1%gcc@10.3.0 arch=linux-ubuntu20.04-zen2
env/%/trvdyncxzfozxofpm3cwgq4vecpxixzs: SPEC = ncurses@6.2%gcc@10.3.0~symlinks+termlib abi=none arch=linux-ubuntu20.04-zen2
env/%/sbzszb7v557ohyd6c2ekirx2t3ctxfxp: SPEC = pkgconf@1.8.0%gcc@10.3.0 arch=linux-ubuntu20.04-zen2
env/%/c4go4gxlcznh5p5nklpjm644epuh3pzc: SPEC = zlib@1.2.12%gcc@10.3.0+optimize+pic+shared patches=0d38234 arch=linux-ubuntu20.04-zen2

# Install dependencies
env/.install/cdqldivylyxocqymwnfzmzc5sx2zwvww: env/.install/gv5kin2xnn33uxyfte6k4a3bynhmtxze env/.install/cuymc7e5gupwyu7vza5d4vrbuslk277p env/.install/yfz2agazed7ohevqvnrmm7jfkmsgwjao env/.install/c4go4gxlcznh5p5nklpjm644epuh3pzc
env/.install/cuymc7e5gupwyu7vza5d4vrbuslk277p: env/.install/7vangk4jvsdgw6u6oe6ob63pyjl5cbgk
env/.install/7vangk4jvsdgw6u6oe6ob63pyjl5cbgk: env/.install/hyb7ehxxyqqp2hiw56bzm5ampkw6cxws
env/.install/yfz2agazed7ohevqvnrmm7jfkmsgwjao: env/.install/73t7ndb5w72hrat5hsax4caox2sgumzu
env/.install/73t7ndb5w72hrat5hsax4caox2sgumzu: env/.install/trvdyncxzfozxofpm3cwgq4vecpxixzs
env/.install/trvdyncxzfozxofpm3cwgq4vecpxixzs: env/.install/sbzszb7v557ohyd6c2ekirx2t3ctxfxp

env/clean:
	rm -f -- env/env env/fetch env/.fetch/cdqldivylyxocqymwnfzmzc5sx2zwvww env/.fetch/gv5kin2xnn33uxyfte6k4a3bynhmtxze env/.fetch/cuymc7e5gupwyu7vza5d4vrbuslk277p env/.fetch/7vangk4jvsdgw6u6oe6ob63pyjl5cbgk env/.fetch/hyb7ehxxyqqp2hiw56bzm5ampkw6cxws env/.fetch/yfz2agazed7ohevqvnrmm7jfkmsgwjao env/.fetch/73t7ndb5w72hrat5hsax4caox2sgumzu env/.fetch/trvdyncxzfozxofpm3cwgq4vecpxixzs env/.fetch/sbzszb7v557ohyd6c2ekirx2t3ctxfxp env/.fetch/c4go4gxlcznh5p5nklpjm644epuh3pzc env/.install/cdqldivylyxocqymwnfzmzc5sx2zwvww env/.install/gv5kin2xnn33uxyfte6k4a3bynhmtxze env/.install/cuymc7e5gupwyu7vza5d4vrbuslk277p env/.install/7vangk4jvsdgw6u6oe6ob63pyjl5cbgk env/.install/hyb7ehxxyqqp2hiw56bzm5ampkw6cxws env/.install/yfz2agazed7ohevqvnrmm7jfkmsgwjao env/.install/73t7ndb5w72hrat5hsax4caox2sgumzu env/.install/trvdyncxzfozxofpm3cwgq4vecpxixzs env/.install/sbzszb7v557ohyd6c2ekirx2t3ctxfxp env/.install/c4go4gxlcznh5p5nklpjm644epuh3pzc
```

Then with `make -O` you get very nice orderly output when packages are built in parallel:
```console
$ make -Orecurse -j16
spack -e . install --only-concrete --only=package /c4go4gxlcznh5p5nklpjm644epuh3pzc && touch c4go4gxlcznh5p5nklpjm644epuh3pzc
==> Installing zlib-1.2.12-c4go4gxlcznh5p5nklpjm644epuh3pzc
...
  Fetch: 0.00s.  Build: 0.88s.  Total: 0.88s.
[+] /tmp/tmp.b1eTyAOe85/store/linux-ubuntu20.04-zen2/gcc-10.3.0/zlib-1.2.12-c4go4gxlcznh5p5nklpjm644epuh3pzc
spack -e . install --only-concrete --only=package /sbzszb7v557ohyd6c2ekirx2t3ctxfxp && touch sbzszb7v557ohyd6c2ekirx2t3ctxfxp
==> Installing pkgconf-1.8.0-sbzszb7v557ohyd6c2ekirx2t3ctxfxp
...
  Fetch: 0.00s.  Build: 3.96s.  Total: 3.96s.
[+] /tmp/tmp.b1eTyAOe85/store/linux-ubuntu20.04-zen2/gcc-10.3.0/pkgconf-1.8.0-sbzszb7v557ohyd6c2ekirx2t3ctxfxp
```

For Perl, at least for me, using `make -j16` versus `spack -e . install -j16` speeds up the builds from 3m32.623s to 2m22.775s, as some configure scripts run in parallel.

Another nice feature is you can do Makefile "metaprogramming" and depend on packages built by Spack. This example fetches all sources (in parallel) first, print a message, and only then build packages (in parallel).

```Makefile
SPACK ?= spack

.PHONY: env

all: env

spack.lock: spack.yaml
	$(SPACK) -e . concretize -f

env.mk: spack.lock
	$(SPACK) -e . env depfile -o $@ --make-target-prefix spack

fetch: spack/fetch
	@echo Fetched all packages && touch $@

env: fetch spack/env
	@echo This executes after the environment has been installed

clean:
	rm -rf spack/ env.mk spack.lock

ifeq (,$(filter clean,$(MAKECMDGOALS)))
include env.mk
endif
```
This commit is contained in:
Harmen Stoppels 2022-05-05 19:45:21 +02:00 committed by GitHub
parent 0dd9e5c86f
commit 2836648904
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 322 additions and 5 deletions

View file

@ -23,7 +23,10 @@
import sys
from glob import glob
from docutils.statemachine import StringList
from sphinx.domains.python import PythonDomain
from sphinx.ext.apidoc import main as sphinx_apidoc
from sphinx.parsers import RSTParser
# -- Spack customizations -----------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
@ -82,9 +85,6 @@
#
# Disable duplicate cross-reference warnings.
#
from sphinx.domains.python import PythonDomain
class PatchedPythonDomain(PythonDomain):
def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode):
if 'refspecific' in node:
@ -92,8 +92,20 @@ def resolve_xref(self, env, fromdocname, builder, typ, target, node, contnode):
return super(PatchedPythonDomain, self).resolve_xref(
env, fromdocname, builder, typ, target, node, contnode)
#
# Disable tabs to space expansion in code blocks
# since Makefiles require tabs.
#
class NoTabExpansionRSTParser(RSTParser):
def parse(self, inputstring, document):
if isinstance(inputstring, str):
lines = inputstring.splitlines()
inputstring = StringList(lines, document.current_source)
super().parse(inputstring, document)
def setup(sphinx):
sphinx.add_domain(PatchedPythonDomain, override=True)
sphinx.add_source_parser(NoTabExpansionRSTParser, override=True)
# -- General configuration -----------------------------------------------------

View file

@ -349,6 +349,24 @@ If the Environment has been concretized, Spack will install the
concretized specs. Otherwise, ``spack install`` will first concretize
the Environment and then install the concretized specs.
.. note::
Every ``spack install`` process builds one package at a time with multiple build
jobs, controlled by the ``-j`` flag and the ``config:build_jobs`` option
(see :ref:`build-jobs`). To speed up environment builds further, independent
packages can be installed in parallel by launching more Spack instances. For
example, the following will build at most four packages in parallel using
three background jobs:
.. code-block:: console
[myenv]$ spack install & spack install & spack install & spack install
Another option is to generate a ``Makefile`` and run ``make -j<N>`` to control
the number of parallel install processes. See :ref:`env-generate-depfile`
for details.
As it installs, ``spack install`` creates symbolic links in the
``logs/`` directory in the Environment, allowing for easy inspection
of build logs related to that environment. The ``spack install``
@ -910,3 +928,91 @@ environment.
The ``spack env deactivate`` command will remove the default view of
the environment from the user's path.
.. _env-generate-depfile:
------------------------------------------
Generating Depfiles from Environments
------------------------------------------
Spack can generate ``Makefile``\s to make it easier to build multiple
packages in an environment in parallel. Generated ``Makefile``\s expose
targets that can be included in existing ``Makefile``\s, to allow
other targets to depend on the environment installation.
A typical workflow is as follows:
.. code:: console
spack env create -d .
spack -e . add perl
spack -e . concretize
spack -e . env depfile > Makefile
make -j8
This creates an environment in the current working directory, and after
concretization, generates a ``Makefile``. Then ``make`` starts at most
8 concurrent jobs, meaning that multiple ``spack install`` processes may
start.
By default the following phony convenience targets are available:
- ``make all``: installs the environment (default target);
- ``make fetch-all``: only fetch sources of all packages;
- ``make clean``: cleans files used by make, but does not uninstall packages.
.. tip::
GNU Make version 4.3 and above have great support for output synchronization
through the ``-O`` and ``--output-sync`` flags, which ensure that output is
printed orderly per package install. To get synchronized output with colors,
use ``make -j<N> SPACK_COLOR=always --output-sync=recurse``.
The following advanced example shows how generated targets can be used in a
``Makefile``:
.. code:: Makefile
SPACK ?= spack
.PHONY: all clean fetch env
all: env
spack.lock: spack.yaml
$(SPACK) -e . concretize -f
env.mk: spack.lock
$(SPACK) -e . env depfile -o $@ --make-target-prefix spack
fetch: spack/fetch
$(info Environment fetched!)
env: spack/env
$(info Environment installed!)
clean:
rm -rf spack.lock env.mk spack/
ifeq (,$(filter clean,$(MAKECMDGOALS)))
include env.mk
endif
When ``make`` is invoked, it first "remakes" the missing include ``env.mk``
from its rule, which triggers concretization. When done, the generated targets
``spack/fetch`` and ``spack/env`` are available. In the above
example, the ``env`` target uses the latter as a prerequisite, meaning
that it can make use of the installed packages in its commands.
As it is typically undesirable to remake ``env.mk`` as part of ``make clean``,
the include is conditional.
.. note::
When including generated ``Makefile``\s, it is important to use
the ``--make-target-prefix`` flag and use the non-phony targets
``<target-prefix>/env`` and ``<target-prefix>/fetch`` as
prerequisites, instead of the phony targets ``<target-prefix>/all``
and ``<target-prefix>/fetch-all`` respectively.

View file

@ -8,6 +8,8 @@
import sys
import tempfile
import six
import llnl.util.filesystem as fs
import llnl.util.tty as tty
from llnl.util.tty.colify import colify
@ -41,7 +43,8 @@
'loads',
'view',
'update',
'revert'
'revert',
'depfile'
]
@ -523,6 +526,154 @@ def env_revert(args):
tty.msg(msg.format(manifest_file))
def env_depfile_setup_parser(subparser):
"""generate a depfile from the concrete environment specs"""
subparser.add_argument(
'--make-target-prefix', default=None, metavar='TARGET',
help='prefix Makefile targets with <TARGET>/<name>. By default the absolute '
'path to the directory makedeps under the environment metadata dir is '
'used. Can be set to an empty string --make-target-prefix \'\'.')
subparser.add_argument(
'--make-disable-jobserver', default=True, action='store_false',
dest='jobserver', help='disable POSIX jobserver support.')
subparser.add_argument(
'-o', '--output', default=None, metavar='FILE',
help='write the depfile to FILE rather than to stdout')
subparser.add_argument(
'-G', '--generator', default='make', choices=('make',),
help='specify the depfile type. Currently only make is supported.')
def env_depfile(args):
# Currently only make is supported.
spack.cmd.require_active_env(cmd_name='env depfile')
env = ev.active_environment()
# Maps each hash in the environment to a string of install prereqs
hash_to_prereqs = {}
hash_to_spec = {}
if args.make_target_prefix is None:
target_prefix = os.path.join(env.env_subdir_path, 'makedeps')
else:
target_prefix = args.make_target_prefix
def get_target(name):
# The `all`, `fetch` and `clean` targets are phony. It doesn't make sense to
# have /abs/path/to/env/metadir/{all,clean} targets. But it *does* make
# sense to have a prefix like `env/all`, `env/fetch`, `env/clean` when they are
# supposed to be included
if name in ('all', 'fetch-all', 'clean') and os.path.isabs(target_prefix):
return name
else:
return os.path.join(target_prefix, name)
def get_install_target(name):
return os.path.join(target_prefix, '.install', name)
def get_fetch_target(name):
return os.path.join(target_prefix, '.fetch', name)
for _, spec in env.concretized_specs():
for s in spec.traverse(root=True):
hash_to_spec[s.dag_hash()] = s
hash_to_prereqs[s.dag_hash()] = [
get_install_target(dep.dag_hash()) for dep in s.dependencies()]
root_dags = [s.dag_hash() for _, s in env.concretized_specs()]
# Root specs without deps are the prereqs for the environment target
root_install_targets = [get_install_target(h) for h in root_dags]
# All package install targets, not just roots.
all_install_targets = [get_install_target(h) for h in hash_to_spec.keys()]
# Fetch targets for all packages in the environment, not just roots.
all_fetch_targets = [get_fetch_target(h) for h in hash_to_spec.keys()]
buf = six.StringIO()
buf.write("""SPACK ?= spack
.PHONY: {} {} {}
{}: {}
{}: {}
{}: {}
\t@touch $@
{}: {}
\t@touch $@
{}:
\t@mkdir -p {} {}
{}: | {}
\t$(info Fetching $(SPEC))
\t$(SPACK) -e '{}' fetch $(SPACK_FETCH_FLAGS) /$(notdir $@) && touch $@
{}: {}
\t$(info Installing $(SPEC))
\t{}$(SPACK) -e '{}' install $(SPACK_INSTALL_FLAGS) --only-concrete --only=package \
--no-add /$(notdir $@) && touch $@
""".format(get_target('all'), get_target('fetch-all'), get_target('clean'),
get_target('all'), get_target('env'),
get_target('fetch-all'), get_target('fetch'),
get_target('env'), ' '.join(root_install_targets),
get_target('fetch'), ' '.join(all_fetch_targets),
get_target('dirs'), get_target('.fetch'), get_target('.install'),
get_target('.fetch/%'), get_target('dirs'),
env.path,
get_target('.install/%'), get_target('.fetch/%'),
'+' if args.jobserver else '', env.path))
# Targets are of the form <prefix>/<name>: [<prefix>/<depname>]...,
# The prefix can be an empty string, in that case we don't add the `/`.
# The name is currently the dag hash of the spec. In principle it
# could be the package name in case of `concretization: together` so
# it can be more easily referred to, but for now we don't special case
# this.
fmt = '{name}{@version}{%compiler}{variants}{arch=architecture}'
# Set SPEC for each hash
buf.write('# Set the human-readable spec for each target\n')
for dag_hash in hash_to_prereqs.keys():
formatted_spec = hash_to_spec[dag_hash].format(fmt)
buf.write("{}: SPEC = {}\n".format(get_target('%/' + dag_hash), formatted_spec))
buf.write('\n')
# Set install dependencies
buf.write('# Install dependencies\n')
for parent, children in hash_to_prereqs.items():
if not children:
continue
buf.write('{}: {}\n'.format(get_install_target(parent), ' '.join(children)))
buf.write('\n')
# Clean target: remove target files but not their folders, cause
# --make-target-prefix can be any existing directory we do not control,
# including empty string (which means deleting the containing folder
# would delete the folder with the Makefile)
buf.write("{}:\n\trm -f -- {} {} {} {}\n".format(
get_target('clean'),
get_target('env'),
get_target('fetch'),
' '.join(all_fetch_targets),
' '.join(all_install_targets)))
makefile = buf.getvalue()
# Finally write to stdout/file.
if args.output:
with open(args.output, 'w') as f:
f.write(makefile)
else:
sys.stdout.write(makefile)
#: Dictionary mapping subcommand names and aliases to functions
subcommand_functions = {}

View file

@ -25,6 +25,7 @@
from spack.main import SpackCommand, SpackCommandError
from spack.spec import Spec
from spack.stage import stage_prefix
from spack.util.executable import Executable
from spack.util.mock_package import MockPackageMultiRepo
from spack.util.path import substitute_path_variables
@ -2856,3 +2857,46 @@ def test_environment_query_spec_by_hash(mock_stage, mock_fetch, install_mockery)
with ev.read('test') as e:
assert not e.matching_spec('libdwarf').installed
assert e.matching_spec('libelf').installed
def test_environment_depfile_makefile(tmpdir, mock_packages):
env('create', 'test')
make = Executable('make')
makefile = str(tmpdir.join('Makefile'))
with ev.read('test'):
add('libdwarf')
concretize()
# Disable jobserver so we can do a dry run.
with ev.read('test'):
env('depfile', '-o', makefile, '--make-disable-jobserver',
'--make-target-prefix', 'prefix')
# Do make dry run.
all_out = make('-n', '-f', makefile, output=str)
# Check whether `make` installs everything
with ev.read('test') as e:
for _, root in e.concretized_specs():
for spec in root.traverse(root=True):
for task in ('.fetch', '.install'):
tgt = os.path.join('prefix', task, spec.dag_hash())
assert 'touch {}'.format(tgt) in all_out
# Check whether make prefix/fetch-all only fetches
fetch_out = make('prefix/fetch-all', '-n', '-f', makefile, output=str)
assert '.install/' not in fetch_out
assert '.fetch/' in fetch_out
def test_environment_depfile_out(tmpdir, mock_packages):
env('create', 'test')
makefile_path = str(tmpdir.join('Makefile'))
with ev.read('test'):
add('libdwarf')
concretize()
with ev.read('test'):
env('depfile', '-G', 'make', '-o', makefile_path)
stdout = env('depfile', '-G', 'make')
with open(makefile_path, 'r') as f:
assert stdout == f.read()

View file

@ -912,7 +912,7 @@ _spack_env() {
then
SPACK_COMPREPLY="-h --help"
else
SPACK_COMPREPLY="activate deactivate create remove rm list ls status st loads view update revert"
SPACK_COMPREPLY="activate deactivate create remove rm list ls status st loads view update revert depfile"
fi
}
@ -1003,6 +1003,10 @@ _spack_env_revert() {
fi
}
_spack_env_depfile() {
SPACK_COMPREPLY="-h --help --make-target-prefix --make-disable-jobserver -o --output -G --generator"
}
_spack_extensions() {
if $list_options
then