repo: refactor indexes to be more extensible

- virtual provider cache and tags were previously generated by nearly
  identical but separate methods.

- factor out an Indexer interface for updating repository caches, and
  provide implementations for each type of index (TagIndex,
  ProviderIndex) so that more can be added if needed.

- Among other things, this allows all indexes to be updated at once.
  This is an advantage because loading package files is the real
  overhead, and building the indexes once the packages are loaded is
  trivial. We avoid extra bulk read-ins by generating all package indexes
  at once.

- This can be extended for dependents (reverse dependencies) and patches
  later.
This commit is contained in:
Todd Gamblin 2018-12-21 09:10:09 -08:00
parent 527ff860f0
commit c1d7adaaac

View file

@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
import abc
import collections
import os
import stat
@ -14,7 +15,7 @@
import traceback
import json
from contextlib import contextmanager
from six import string_types
from six import string_types, add_metaclass
try:
from collections.abc import Mapping
@ -230,111 +231,153 @@ def update_package(self, pkg_name):
self._tag_dict[tag].append(package.name)
@llnl.util.lang.memoized
def make_provider_index_cache(packages_path, namespace):
"""Lazily updates the provider index cache associated with a repository,
if need be, then returns it. Caches results for later look-ups.
@add_metaclass(abc.ABCMeta)
class Indexer(object):
"""Adaptor for indexes that need to be generated when repos are updated."""
Args:
packages_path: path of the repository
namespace: namespace of the repository
def create(self):
self.index = self._create()
@abc.abstractmethod
def _create(self):
"""Create an empty index and return it."""
@abc.abstractmethod
def read(self, stream):
"""Read this index from a provided file object."""
@abc.abstractmethod
def update(self, pkg_fullname):
"""Update the index in memory with information about a package."""
@abc.abstractmethod
def write(self, stream):
"""Write the index to a file object."""
class TagIndexer(Indexer):
"""Lifecycle methods for a TagIndex on a Repo."""
def _create(self):
return TagIndex()
def read(self, stream):
self.index = TagIndex.from_json(stream)
def update(self, pkg_fullname):
self.index.update_package(pkg_fullname)
def write(self, stream):
self.index.to_json(stream)
class ProviderIndexer(Indexer):
"""Lifecycle methods for virtual package providers."""
def _create(self):
return ProviderIndex()
def read(self, stream):
self.index = ProviderIndex.from_yaml(stream)
def update(self, pkg_fullname):
self.index.remove_provider(pkg_fullname)
self.index.update(pkg_fullname)
def write(self, stream):
self.index.to_yaml(stream)
class RepoIndex(object):
"""Container class that manages a set of Indexers for a Repo.
This class is responsible for checking packages in a repository for
updates (using ``FastPackageChecker``) and for regenerating indexes
when they're needed.
``Indexers`` shoudl be added to the ``RepoIndex`` using
``add_index(name, indexer)``, and they should support the interface
defined by ``Indexer``, so that the ``RepoIndex`` can read, generate,
and update stored indices.
Generated indexes are accessed by name via ``__getitem__()``.
Returns:
instance of ProviderIndex
"""
# Map that goes from package names to stat info
fast_package_checker = FastPackageChecker(packages_path)
def __init__(self, package_checker, namespace):
self.checker = package_checker
self.packages_path = self.checker.packages_path
self.namespace = namespace
# Filename of the provider index cache
cache_filename = 'providers/{0}-index.yaml'.format(namespace)
self.indexers = {}
self.indexes = {}
# Compute which packages needs to be updated in the cache
misc_cache = spack.caches.misc_cache
index_mtime = misc_cache.mtime(cache_filename)
def add_indexer(self, name, indexer):
"""Add an indexer to the repo index.
needs_update = [
x for x, sinfo in fast_package_checker.items()
if sinfo.st_mtime > index_mtime
]
Arguments:
name (str): name of this indexer
# Read the old ProviderIndex, or make a new one.
index_existed = misc_cache.init_entry(cache_filename)
indexer (object): an object that supports create(), read(),
write(), and get_index() operations
if index_existed and not needs_update:
"""
self.indexers[name] = indexer
# If the provider index exists and doesn't need an update
# just read from it
with misc_cache.read_transaction(cache_filename) as f:
index = ProviderIndex.from_yaml(f)
def __getitem__(self, name):
"""Get the index with the specified name, reindexing if needed."""
indexer = self.indexers.get(name)
if not indexer:
raise KeyError('no such index: %s' % name)
else:
if name not in self.indexes:
self._build_all_indexes()
# Otherwise we need a write transaction to update it
with misc_cache.write_transaction(cache_filename) as (old, new):
return self.indexes[name]
index = ProviderIndex.from_yaml(old) if old else ProviderIndex()
def _build_all_indexes(self):
"""Build all the indexes at once.
for pkg_name in needs_update:
namespaced_name = '{0}.{1}'.format(namespace, pkg_name)
index.remove_provider(namespaced_name)
index.update(namespaced_name)
We regenerate *all* indexes whenever *any* index needs an update,
because the main bottleneck here is loading all the packages. It
can take tens of seconds to regenerate sequentially, and we'd
rather only pay that cost once rather than on several
invocations.
index.to_yaml(new)
"""
for name, indexer in self.indexers.items():
self.indexes[name] = self._build_index(name, indexer)
return index
def _build_index(self, name, indexer):
"""Determine which packages need an update, and update indexes."""
# Filename of the provider index cache (we assume they're all json)
cache_filename = '{0}/{1}-index.json'.format(name, self.namespace)
@llnl.util.lang.memoized
def make_tag_index_cache(packages_path, namespace):
"""Lazily updates the tag index cache associated with a repository,
if need be, then returns it. Caches results for later look-ups.
# Compute which packages needs to be updated in the cache
misc_cache = spack.caches.misc_cache
index_mtime = misc_cache.mtime(cache_filename)
Args:
packages_path: path of the repository
namespace: namespace of the repository
needs_update = [
x for x, sinfo in self.checker.items()
if sinfo.st_mtime > index_mtime
]
Returns:
instance of TagIndex
"""
# Map that goes from package names to stat info
fast_package_checker = FastPackageChecker(packages_path)
index_existed = misc_cache.init_entry(cache_filename)
if index_existed and not needs_update:
# If the index exists and doesn't need an update, read it
with misc_cache.read_transaction(cache_filename) as f:
indexer.read(f)
# Filename of the provider index cache
cache_filename = 'tags/{0}-index.json'.format(namespace)
else:
# Otherwise update it and rewrite the cache file
with misc_cache.write_transaction(cache_filename) as (old, new):
indexer.read(old) if old else indexer.create()
# Compute which packages needs to be updated in the cache
misc_cache = spack.caches.misc_cache
index_mtime = misc_cache.mtime(cache_filename)
for pkg_name in needs_update:
namespaced_name = '%s.%s' % (self.namespace, pkg_name)
indexer.update(namespaced_name)
needs_update = [
x for x, sinfo in fast_package_checker.items()
if sinfo.st_mtime > index_mtime
]
indexer.write(new)
# Read the old ProviderIndex, or make a new one.
index_existed = misc_cache.init_entry(cache_filename)
if index_existed and not needs_update:
# If the provider index exists and doesn't need an update
# just read from it
with misc_cache.read_transaction(cache_filename) as f:
index = TagIndex.from_json(f)
else:
# Otherwise we need a write transaction to update it
with misc_cache.write_transaction(cache_filename) as (old, new):
index = TagIndex.from_json(old) if old else TagIndex()
for pkg_name in needs_update:
namespaced_name = '{0}.{1}'.format(namespace, pkg_name)
index.update_package(namespaced_name)
index.to_json(new)
return index
return indexer.index
class RepoPath(object):
@ -658,11 +701,8 @@ def check(condition, msg):
# Maps that goes from package name to corresponding file stat
self._fast_package_checker = None
# Index of virtual dependencies, computed lazily
self._provider_index = None
# Index of tags, computed lazily
self._tag_index = None
# Indexes for this repository, computed lazily
self._repo_index = None
# make sure the namespace for packages in this repo exists.
self._create_namespace()
@ -847,27 +887,24 @@ def purge(self):
"""Clear entire package instance cache."""
self._instances.clear()
@property
def index(self):
"""Construct the index for this repo lazily."""
if self._repo_index is None:
self._repo_index = RepoIndex(self._pkg_checker, self.namespace)
self._repo_index.add_indexer('providers', ProviderIndexer())
self._repo_index.add_indexer('tags', TagIndexer())
return self._repo_index
@property
def provider_index(self):
"""A provider index with names *specific* to this repo."""
if self._provider_index is None:
self._provider_index = make_provider_index_cache(
self.packages_path, self.namespace
)
return self._provider_index
return self.index['providers']
@property
def tag_index(self):
"""A provider index with names *specific* to this repo."""
if self._tag_index is None:
self._tag_index = make_tag_index_cache(
self.packages_path, self.namespace
)
return self._tag_index
"""Index of tags and which packages they're defined on."""
return self.index['tags']
@_autospec
def providers_for(self, vpkg_spec):