Pipelines: Temporary buildcache storage (#21474)

Before this change, in pipeline environments where runners do not have access
to persistent shared file-system storage, the only way to pass buildcaches to
dependents in later stages was by using the "enable-artifacts-buildcache" flag
in the gitlab-ci section of the spack.yaml.  This change supports a second
mechanism, named "temporary-storage-url-prefix", which can be provided instead
of the "enable-artifacts-buildcache" feature, but the two cannot be used at the
same time.  If this prefix is provided (only "file://" and "s3://" urls are
supported), the gitlab "CI_PIPELINE_ID" will be appended to it to create a url
for a mirror where pipeline jobs will write buildcache entries for use by jobs
in subsequent stages.  If this prefix is provided, a cleanup job will be
generated to run after all the rebuild jobs have finished that will delete the
contents of the temporary mirror.  To support this behavior a new mirror
sub-command has been added: "spack mirror destroy" which can take either a
mirror name or url.

This change also fixes a bug in generation of "needs" list for each job.  Each
jobs "needs" list is supposed to only contain direct dependencies for scheduling
purposes, unless "enable-artifacts-buildcache" is specified.  Only in that case
are the needs lists supposed to contain all transitive dependencies.  This
changes fixes a bug that caused the needs lists to always contain all transitive
dependencies, regardless of whether or not "enable-artifacts-buildcache" was
specified.
This commit is contained in:
Scott Wittenburg 2021-02-16 18:21:18 -07:00 committed by GitHub
parent 1fe51ffe18
commit 5b0507cc65
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 432 additions and 84 deletions

View file

@ -763,6 +763,10 @@ def generate_package_index(cache_prefix):
url_util.join(cache_prefix, 'index.json.hash'),
keep_original=False,
extra_args={'ContentType': 'text/plain'})
except Exception as err:
msg = 'Encountered problem pushing package index to {0}: {1}'.format(
cache_prefix, err)
tty.warn(msg)
finally:
shutil.rmtree(tmpdir)
@ -823,6 +827,10 @@ def generate_key_index(key_prefix, tmpdir=None):
url_util.join(key_prefix, 'index.json'),
keep_original=False,
extra_args={'ContentType': 'application/json'})
except Exception as err:
msg = 'Encountered problem pushing key index to {0}: {1}'.format(
key_prefix, err)
tty.warn(msg)
finally:
if remove_tmpdir:
shutil.rmtree(tmpdir)

View file

@ -42,6 +42,7 @@
]
SPACK_PR_MIRRORS_ROOT_URL = 's3://spack-pr-mirrors'
TEMP_STORAGE_MIRROR_NAME = 'ci_temporary_mirror'
spack_gpg = spack.main.SpackCommand('gpg')
spack_compiler = spack.main.SpackCommand('compiler')
@ -396,8 +397,6 @@ def append_dep(s, d):
# root_spec = get_spec_string(spec)
root_spec = spec
rkey = spec_deps_key(spec)
for s in spec.traverse(deptype=all):
if s.external:
tty.msg('Will not stage external pkg: {0}'.format(s))
@ -413,8 +412,6 @@ def append_dep(s, d):
'needs_rebuild': not up_to_date_mirrors,
}
append_dep(rkey, skey)
for d in s.dependencies(deptype=all):
dkey = spec_deps_key(d)
if d.external:
@ -586,6 +583,10 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False,
if 'rebuild-index' in gitlab_ci and gitlab_ci['rebuild-index'] is False:
rebuild_index_enabled = False
temp_storage_url_prefix = None
if 'temporary-storage-url-prefix' in gitlab_ci:
temp_storage_url_prefix = gitlab_ci['temporary-storage-url-prefix']
bootstrap_specs = []
phases = []
if 'bootstrap' in gitlab_ci:
@ -928,9 +929,30 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False,
]
if job_id > 0:
if rebuild_index_enabled and not is_pr_pipeline:
if temp_storage_url_prefix:
# There were some rebuild jobs scheduled, so we will need to
# schedule a job to clean up the temporary storage location
# associated with this pipeline.
stage_names.append('cleanup-temp-storage')
cleanup_job = {}
if service_job_config:
copy_attributes(default_attrs,
service_job_config,
cleanup_job)
cleanup_job['stage'] = 'cleanup-temp-storage'
cleanup_job['script'] = [
'spack mirror destroy --mirror-url {0}/$CI_PIPELINE_ID'.format(
temp_storage_url_prefix)
]
cleanup_job['when'] = 'always'
output_object['cleanup'] = cleanup_job
if rebuild_index_enabled:
# Add a final job to regenerate the index
final_stage = 'stage-rebuild-index'
stage_names.append('stage-rebuild-index')
final_job = {}
if service_job_config:
@ -938,15 +960,18 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file, prune_dag=False,
service_job_config,
final_job)
final_script = 'spack buildcache update-index --keys'
final_script = '{0} -d {1}'.format(final_script, mirror_urls[0])
index_target_mirror = mirror_urls[0]
if is_pr_pipeline:
index_target_mirror = pr_mirror_url
final_job['stage'] = final_stage
final_job['script'] = [final_script]
final_job['stage'] = 'stage-rebuild-index'
final_job['script'] = [
'spack buildcache update-index --keys -d {0}'.format(
index_target_mirror)
]
final_job['when'] = 'always'
output_object['rebuild-index'] = final_job
stage_names.append(final_stage)
output_object['stages'] = stage_names

View file

@ -17,6 +17,7 @@
import spack.environment as ev
import spack.hash_types as ht
import spack.util.executable as exe
import spack.util.url as url_util
description = "manage continuous integration pipelines"
@ -159,6 +160,7 @@ def ci_rebuild(args):
# SPACK_SIGNING_KEY
ci_artifact_dir = get_env_var('CI_PROJECT_DIR')
ci_pipeline_id = get_env_var('CI_PIPELINE_ID')
signing_key = get_env_var('SPACK_SIGNING_KEY')
root_spec = get_env_var('SPACK_ROOT_SPEC')
job_spec_pkg_name = get_env_var('SPACK_JOB_SPEC_PKG_NAME')
@ -219,19 +221,28 @@ def ci_rebuild(args):
spack_is_pr_pipeline = True if pr_env_var == 'True' else False
pipeline_mirror_url = None
temp_storage_url_prefix = None
if 'temporary-storage-url-prefix' in gitlab_ci:
temp_storage_url_prefix = gitlab_ci['temporary-storage-url-prefix']
pipeline_mirror_url = url_util.join(
temp_storage_url_prefix, ci_pipeline_id)
enable_artifacts_mirror = False
artifact_mirror_url = None
if 'enable-artifacts-buildcache' in gitlab_ci:
enable_artifacts_mirror = gitlab_ci['enable-artifacts-buildcache']
if enable_artifacts_mirror or spack_is_pr_pipeline:
# If this is a PR pipeline, we will override the setting to
# make sure that artifacts buildcache is enabled. Otherwise
# jobs will not have binary deps available since we do not
# allow pushing binaries to remote mirror during PR pipelines
if (enable_artifacts_mirror or (spack_is_pr_pipeline and
not enable_artifacts_mirror and not temp_storage_url_prefix)):
# If you explicitly enabled the artifacts buildcache feature, or
# if this is a PR pipeline but you did not enable either of the
# per-pipeline temporary storage features, we force the use of
# artifacts buildcache. Otherwise jobs will not have binary
# dependencies from previous stages available since we do not
# allow pushing binaries to the remote mirror during PR pipelines.
enable_artifacts_mirror = True
artifact_mirror_url = 'file://' + local_mirror_dir
pipeline_mirror_url = 'file://' + local_mirror_dir
mirror_msg = 'artifact buildcache enabled, mirror url: {0}'.format(
artifact_mirror_url)
pipeline_mirror_url)
tty.debug(mirror_msg)
# Clean out scratch directory from last stage
@ -325,8 +336,8 @@ def add_mirror(mirror_name, mirror_url):
if pr_mirror_url:
add_mirror('ci_pr_mirror', pr_mirror_url)
if enable_artifacts_mirror:
add_mirror('ci_artifact_mirror', artifact_mirror_url)
if pipeline_mirror_url:
add_mirror(spack_ci.TEMP_STORAGE_MIRROR_NAME, pipeline_mirror_url)
tty.debug('listing spack mirrors:')
spack_cmd('mirror', 'list')
@ -423,17 +434,18 @@ def add_mirror(mirror_name, mirror_url):
tty.msg('Permission problem writing to mirror')
tty.msg(err_msg)
# Create another copy of that buildcache on "local artifact
# mirror" (only done if artifacts buildcache is enabled)
# Create another copy of that buildcache in the per-pipeline
# temporary storage mirror (this is only done if either artifacts
# buildcache is enabled or a temporary storage url prefix is set)
spack_ci.push_mirror_contents(env, job_spec, job_spec_yaml_path,
artifact_mirror_url, cdash_build_id,
pipeline_mirror_url, cdash_build_id,
sign_binaries)
# Relate this build to its dependencies on CDash (if enabled)
if enable_cdash:
spack_ci.relate_cdash_builds(
spec_map, cdash_base_url, cdash_build_id, cdash_project,
artifact_mirror_url or pr_mirror_url or remote_mirror_url)
pipeline_mirror_url or pr_mirror_url or remote_mirror_url)
def ci_reindex(args):

View file

@ -67,6 +67,19 @@ def setup_parser(subparser):
" retrieve all versions of each package")
arguments.add_common_arguments(create_parser, ['specs'])
# Destroy
destroy_parser = sp.add_parser('destroy', help=mirror_destroy.__doc__)
destroy_target = destroy_parser.add_mutually_exclusive_group(required=True)
destroy_target.add_argument('-m', '--mirror-name',
metavar='mirror_name',
type=str,
help="find mirror to destroy by name")
destroy_target.add_argument('--mirror-url',
metavar='mirror_url',
type=str,
help="find mirror to destroy by url")
# used to construct scope arguments below
scopes = spack.config.scopes()
scopes_metavar = spack.config.scopes_metavar
@ -360,8 +373,22 @@ def mirror_create(args):
sys.exit(1)
def mirror_destroy(args):
"""Given a url, recursively delete everything under it."""
mirror_url = None
if args.mirror_name:
result = spack.mirror.MirrorCollection().lookup(args.mirror_name)
mirror_url = result.push_url
elif args.mirror_url:
mirror_url = args.mirror_url
web_util.remove_url(mirror_url, recursive=True)
def mirror(parser, args):
action = {'create': mirror_create,
'destroy': mirror_destroy,
'add': mirror_add,
'remove': mirror_remove,
'rm': mirror_remove,

View file

@ -65,66 +65,90 @@
'properties': runner_attributes_schema_items,
}
#: Properties for inclusion in other schemas
properties = {
'gitlab-ci': {
'type': 'object',
'additionalProperties': False,
'required': ['mappings'],
'patternProperties': union_dicts(
runner_attributes_schema_items,
{
'bootstrap': {
'type': 'array',
'items': {
'anyOf': [
{
'type': 'string',
}, {
'type': 'object',
'additionalProperties': False,
'required': ['name'],
'properties': {
'name': {
'type': 'string',
},
'compiler-agnostic': {
'type': 'boolean',
'default': False,
},
},
},
],
},
},
'mappings': {
'type': 'array',
'items': {
core_shared_properties = union_dicts(
runner_attributes_schema_items,
{
'bootstrap': {
'type': 'array',
'items': {
'anyOf': [
{
'type': 'string',
}, {
'type': 'object',
'additionalProperties': False,
'required': ['match'],
'required': ['name'],
'properties': {
'match': {
'type': 'array',
'items': {
'type': 'string',
},
'name': {
'type': 'string',
},
'compiler-agnostic': {
'type': 'boolean',
'default': False,
},
'runner-attributes': runner_selector_schema,
},
},
],
},
},
'mappings': {
'type': 'array',
'items': {
'type': 'object',
'additionalProperties': False,
'required': ['match'],
'properties': {
'match': {
'type': 'array',
'items': {
'type': 'string',
},
},
'runner-attributes': runner_selector_schema,
},
'enable-artifacts-buildcache': {
'type': 'boolean',
'default': False,
},
'service-job-attributes': runner_selector_schema,
'rebuild-index': {'type': 'boolean'},
}
),
},
},
'service-job-attributes': runner_selector_schema,
'rebuild-index': {'type': 'boolean'},
},
)
gitlab_ci_properties = {
'anyOf': [
{
'type': 'object',
'additionalProperties': False,
'required': ['mappings'],
'properties': union_dicts(
core_shared_properties,
{
'enable-artifacts-buildcache': {
'type': 'boolean',
},
},
),
},
{
'type': 'object',
'additionalProperties': False,
'required': ['mappings'],
'properties': union_dicts(
core_shared_properties,
{
'temporary-storage-url-prefix': {
'type': 'string',
},
},
),
},
]
}
#: Properties for inclusion in other schemas
properties = {
'gitlab-ci': gitlab_ci_properties,
}
#: Full schema with metadata
schema = {

View file

@ -7,7 +7,7 @@
import json
import os
import pytest
from jsonschema import validate
from jsonschema import validate, ValidationError
import spack
import spack.ci as ci
@ -20,6 +20,7 @@
import spack.repo as repo
from spack.schema.buildcache_spec import schema as spec_yaml_schema
from spack.schema.database_index import schema as db_idx_schema
from spack.schema.gitlab_ci import schema as gitlab_ci_schema
from spack.spec import Spec, CompilerSpec
from spack.util.mock_package import MockPackageMultiRepo
import spack.util.executable as exe
@ -185,10 +186,18 @@ def _validate_needs_graph(yaml_contents, needs_graph, artifacts):
for needs_def_name, needs_list in needs_graph.items():
if job_name.startswith(needs_def_name):
# check job needs against the expected needs definition
j_needs = job_def['needs']
print('job {0} needs:'.format(needs_def_name))
print([j['job'] for j in j_needs])
print('expected:')
print([nl for nl in needs_list])
assert all([job_needs['job'][:job_needs['job'].index('/')]
in needs_list for job_needs in job_def['needs']])
in needs_list for job_needs in j_needs])
assert(all([nl in
[n['job'][:n['job'].index('/')] for n in j_needs]
for nl in needs_list]))
assert all([job_needs['artifacts'] == artifacts
for job_needs in job_def['needs']])
for job_needs in j_needs])
break
@ -378,7 +387,10 @@ def test_ci_generate_with_cdash_token(tmpdir, mutable_mock_env_path,
fake_token = 'notreallyatokenbutshouldnotmatter'
os.environ['SPACK_CDASH_AUTH_TOKEN'] = fake_token
copy_to_file = str(tmpdir.join('backup-ci.yml'))
output = ci_cmd('generate', '--copy-to', copy_to_file, output=str)
try:
output = ci_cmd('generate', '--copy-to', copy_to_file, output=str)
finally:
del os.environ['SPACK_CDASH_AUTH_TOKEN']
# That fake token should still have resulted in being unable to
# register build group with cdash, but the workload should
# still have been generated.
@ -582,7 +594,11 @@ def test_ci_generate_for_pr_pipeline(tmpdir, mutable_mock_env_path,
os.environ['SPACK_PR_BRANCH'] = 'fake-test-branch'
monkeypatch.setattr(
ci, 'SPACK_PR_MIRRORS_ROOT_URL', r"file:///fake/mirror")
ci_cmd('generate', '--output-file', outputfile)
try:
ci_cmd('generate', '--output-file', outputfile)
finally:
del os.environ['SPACK_IS_PR_PIPELINE']
del os.environ['SPACK_PR_BRANCH']
with open(outputfile) as f:
contents = f.read()
@ -1271,3 +1287,97 @@ def test_ci_subcommands_without_mirror(tmpdir, mutable_mock_env_path,
output = ci_cmd('rebuild-index', output=str, fail_on_error=False)
ex = 'spack ci rebuild-index requires an env containing a mirror'
assert(ex in output)
def test_ensure_only_one_temporary_storage():
"""Make sure 'gitlab-ci' section of env does not allow specification of
both 'enable-artifacts-buildcache' and 'temporary-storage-url-prefix'."""
gitlab_ci_template = """
gitlab-ci:
{0}
mappings:
- match:
- notcheckedhere
runner-attributes:
tags:
- donotcare
"""
enable_artifacts = 'enable-artifacts-buildcache: True'
temp_storage = 'temporary-storage-url-prefix: file:///temp/mirror'
specify_both = """{0}
{1}
""".format(enable_artifacts, temp_storage)
specify_neither = ''
# User can specify "enable-artifacts-buildcache" (boolean)
yaml_obj = syaml.load(gitlab_ci_template.format(enable_artifacts))
validate(yaml_obj, gitlab_ci_schema)
# User can also specify "temporary-storage-url-prefix" (string)
yaml_obj = syaml.load(gitlab_ci_template.format(temp_storage))
validate(yaml_obj, gitlab_ci_schema)
# However, specifying both should fail to validate
yaml_obj = syaml.load(gitlab_ci_template.format(specify_both))
with pytest.raises(ValidationError):
validate(yaml_obj, gitlab_ci_schema)
# Specifying neither should be fine too, as neither of these properties
# should be required
yaml_obj = syaml.load(gitlab_ci_template.format(specify_neither))
validate(yaml_obj, gitlab_ci_schema)
def test_ci_generate_temp_storage_url(tmpdir, mutable_mock_env_path,
env_deactivate, install_mockery,
mock_packages, monkeypatch):
"""Verify correct behavior when using temporary-storage-url-prefix"""
filename = str(tmpdir.join('spack.yaml'))
with open(filename, 'w') as f:
f.write("""\
spack:
specs:
- archive-files
mirrors:
some-mirror: https://my.fake.mirror
gitlab-ci:
temporary-storage-url-prefix: file:///work/temp/mirror
mappings:
- match:
- archive-files
runner-attributes:
tags:
- donotcare
image: donotcare
""")
with tmpdir.as_cwd():
env_cmd('create', 'test', './spack.yaml')
outputfile = str(tmpdir.join('.gitlab-ci.yml'))
monkeypatch.setattr(
ci, 'SPACK_PR_MIRRORS_ROOT_URL', r"file:///fake/mirror")
with ev.read('test'):
ci_cmd('generate', '--output-file', outputfile)
with open(outputfile) as of:
pipeline_doc = syaml.load(of.read())
print(pipeline_doc)
assert('cleanup' in pipeline_doc)
cleanup_job = pipeline_doc['cleanup']
assert('script' in cleanup_job)
cleanup_task = cleanup_job['script'][0]
assert(cleanup_task.startswith('spack mirror destroy'))
assert('stages' in pipeline_doc)
stages = pipeline_doc['stages']
# Cleanup job should be 2nd to last, just before rebuild-index
assert('stage' in cleanup_job)
assert(cleanup_job['stage'] == stages[-2])

View file

@ -14,6 +14,9 @@
env = SpackCommand('env')
add = SpackCommand('add')
concretize = SpackCommand('concretize')
install = SpackCommand('install')
buildcache = SpackCommand('buildcache')
uninstall = SpackCommand('uninstall')
@pytest.fixture
@ -183,3 +186,39 @@ def test_mirror_name_collision(tmp_scope):
with pytest.raises(SpackCommandError):
mirror('add', '--scope', tmp_scope, 'first', '1')
def test_mirror_destroy(install_mockery_mutable_config,
mock_packages, mock_fetch, mock_archive,
mutable_config, monkeypatch, tmpdir):
# Create a temp mirror directory for buildcache usage
mirror_dir = tmpdir.join('mirror_dir')
mirror_url = 'file://{0}'.format(mirror_dir.strpath)
mirror('add', 'atest', mirror_url)
spec_name = 'libdwarf'
# Put a binary package in a buildcache
install('--no-cache', spec_name)
buildcache('create', '-u', '-a', '-f', '-d', mirror_dir.strpath, spec_name)
contents = os.listdir(mirror_dir.strpath)
assert('build_cache' in contents)
# Destroy mirror by name
mirror('destroy', '-m', 'atest')
assert(not os.path.exists(mirror_dir.strpath))
buildcache('create', '-u', '-a', '-f', '-d', mirror_dir.strpath, spec_name)
contents = os.listdir(mirror_dir.strpath)
assert('build_cache' in contents)
# Destroy mirror by url
mirror('destroy', '--mirror-url', mirror_url)
assert(not os.path.exists(mirror_dir.strpath))
uninstall('-y', spec_name)
mirror('remove', 'atest')

View file

@ -6,10 +6,14 @@
import ordereddict_backport
import pytest
import spack.config
import spack.paths
import spack.util.web
import spack.util.s3
from spack.version import ver
import llnl.util.tty as tty
def _create_url(relative_url):
web_data_path = os.path.join(spack.paths.test_path, 'data', 'web')
@ -195,3 +199,58 @@ def test_list_url(tmpdir):
'file-0.txt',
'file-1.txt',
'file-2.txt']
class MockPages(object):
def search(self, *args, **kwargs):
return [
{'Key': 'keyone'},
{'Key': 'keytwo'},
{'Key': 'keythree'},
]
class MockPaginator(object):
def paginate(self, *args, **kwargs):
return MockPages()
class MockS3Client(object):
def get_paginator(self, *args, **kwargs):
return MockPaginator()
def delete_objects(self, *args, **kwargs):
return {
'Errors': [
{'Key': 'keyone', 'Message': 'Access Denied'}
],
'Deleted': [
{'Key': 'keytwo'},
{'Key': 'keythree'}
],
}
def delete_object(self, *args, **kwargs):
pass
def test_remove_s3_url(monkeypatch, capfd):
fake_s3_url = 's3://my-bucket/subdirectory/mirror'
def mock_create_s3_session(url):
return MockS3Client()
monkeypatch.setattr(
spack.util.s3, 'create_s3_session', mock_create_s3_session)
current_debug_level = tty.debug_level()
tty.set_debug(1)
spack.util.web.remove_url(fake_s3_url, recursive=True)
err = capfd.readouterr()[1]
tty.set_debug(current_debug_level)
assert('Failed to delete keyone (Access Denied)' in err)
assert('Deleted keythree' in err)
assert('Deleted keytwo' in err)

View file

@ -229,17 +229,57 @@ def url_exists(url):
return False
def remove_url(url):
def _debug_print_delete_results(result):
if 'Deleted' in result:
for d in result['Deleted']:
tty.debug('Deleted {0}'.format(d['Key']))
if 'Errors' in result:
for e in result['Errors']:
tty.debug('Failed to delete {0} ({1})'.format(
e['Key'], e['Message']))
def remove_url(url, recursive=False):
url = url_util.parse(url)
local_path = url_util.local_file_path(url)
if local_path:
os.remove(local_path)
if recursive:
shutil.rmtree(local_path)
else:
os.remove(local_path)
return
if url.scheme == 's3':
s3 = s3_util.create_s3_session(url)
s3.delete_object(Bucket=url.netloc, Key=url.path)
bucket = url.netloc
if recursive:
# Because list_objects_v2 can only return up to 1000 items
# at a time, we have to paginate to make sure we get it all
prefix = url.path.strip('/')
paginator = s3.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=bucket, Prefix=prefix)
delete_request = {'Objects': []}
for item in pages.search('Contents'):
if not item:
continue
delete_request['Objects'].append({'Key': item['Key']})
# Make sure we do not try to hit S3 with a list of more
# than 1000 items
if len(delete_request['Objects']) >= 1000:
r = s3.delete_objects(Bucket=bucket, Delete=delete_request)
_debug_print_delete_results(r)
delete_request = {'Objects': []}
# Delete any items that remain
if len(delete_request['Objects']):
r = s3.delete_objects(Bucket=bucket, Delete=delete_request)
_debug_print_delete_results(r)
else:
s3.delete_object(Bucket=bucket, Key=url.path)
return
# Don't even try for other URL schemes.

View file

@ -1123,7 +1123,7 @@ _spack_mirror() {
then
SPACK_COMPREPLY="-h --help -n --no-checksum --deprecated"
else
SPACK_COMPREPLY="create add remove rm set-url list"
SPACK_COMPREPLY="create destroy add remove rm set-url list"
fi
}
@ -1136,6 +1136,10 @@ _spack_mirror_create() {
fi
}
_spack_mirror_destroy() {
SPACK_COMPREPLY="-h --help -m --mirror-name --mirror-url"
}
_spack_mirror_add() {
if $list_options
then