Add a ProviderIndex cache.

- Spack will check if the index needs updating, and will only parse
  all package files if it does.

- Spack tries to parse as few package files as necessary.
This commit is contained in:
Todd Gamblin 2016-05-28 20:27:22 -07:00
parent cf2f902b82
commit faa0a0e4c3
3 changed files with 143 additions and 13 deletions

2
.gitignore vendored
View file

@ -1,5 +1,7 @@
/var/spack/stage
/var/spack/cache
/var/spack/repos/*/index.yaml
/var/spack/repos/*/lock
*.pyc
/opt
*~

View file

@ -23,6 +23,9 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
import os
import stat
import shutil
import errno
import exceptions
import sys
import inspect
@ -33,6 +36,7 @@
import yaml
import llnl.util.tty as tty
from llnl.util.lock import Lock
from llnl.util.filesystem import *
import spack.error
@ -394,13 +398,25 @@ def check(condition, msg):
if not condition: raise BadRepoError(msg)
# Validate repository layout.
self.config_file = join_path(self.root, repo_config_name)
self.config_file = join_path(self.root, repo_config_name)
check(os.path.isfile(self.config_file),
"No %s found in '%s'" % (repo_config_name, root))
self.packages_path = join_path(self.root, packages_dir_name)
check(os.path.isdir(self.packages_path),
"No directory '%s' found in '%s'" % (repo_config_name, root))
self.index_file = join_path(self.root, repo_index_name)
check(not os.path.exists(self.index_file) or
(os.path.isfile(self.index_file) and os.access(self.index_file, os.R_OK|os.W_OK)),
"Cannot access repository index file in %s" % root)
# lock file for reading/writing the index
self._lock_path = join_path(self.root, 'lock')
if not os.path.exists(self._lock_path):
touch(self._lock_path)
self._lock = Lock(self._lock_path)
# Read configuration and validate namespace
config = self._read_config()
check('namespace' in config, '%s must define a namespace.'
@ -424,7 +440,14 @@ def check(condition, msg):
self._modules = {}
self._classes = {}
self._instances = {}
# list of packages that are newer than the index.
self._needs_update = []
# Index of virtual dependencies
self._provider_index = None
# Cached list of package names.
self._all_package_names = None
# make sure the namespace for packages in this repo exists.
@ -611,13 +634,56 @@ def purge(self):
self._instances.clear()
def _update_provider_index(self):
# Check modification dates of all packages
self._fast_package_check()
def read():
with open(self.index_file) as f:
self._provider_index = ProviderIndex.from_yaml(f)
# Read the old ProviderIndex, or make a new one.
index_existed = os.path.isfile(self.index_file)
if index_existed and not self._needs_update:
self._lock.acquire_read()
try:
read()
finally:
self._lock.release_read()
else:
self._lock.acquire_write()
try:
if index_existed:
with open(self.index_file) as f:
self._provider_index = ProviderIndex.from_yaml(f)
else:
self._provider_index = ProviderIndex()
for pkg_name in self._needs_update:
namespaced_name = '%s.%s' % (self.namespace, pkg_name)
self._provider_index.remove_provider(namespaced_name)
self._provider_index.update(namespaced_name)
tmp = self.index_file + '.tmp'
with open(tmp, 'w') as f:
self._provider_index.to_yaml(f)
os.rename(tmp, self.index_file)
except:
shutil.rmtree(tmp, ignore_errors=True)
raise
finally:
self._lock.release_write()
@property
def provider_index(self):
"""A provider index with names *specific* to this repo."""
if self._provider_index is None:
namespaced_names = ['%s.%s' % (self.namespace, n)
for n in self.all_package_names()]
self._provider_index = ProviderIndex(namespaced_names)
self._update_provider_index()
return self._provider_index
@ -663,21 +729,33 @@ def filename_for_package_name(self, spec):
return join_path(pkg_dir, package_file_name)
def all_package_names(self):
"""Returns a sorted list of all package names in the Repo."""
def _fast_package_check(self):
"""List packages in the repo and cehck whether index is up to date.
Both of these opreations require checking all `package.py`
files so we do them at the same time. We list the repo
directory and look at package.py files, and we compare the
index modification date with the ost recently modified package
file, storing the result.
The implementation here should try to minimize filesystem
calls. At the moment, it is O(number of packages) and makes
about one stat call per package. This is resonably fast, and
avoids actually importing packages in Spack, which is slow.
"""
if self._all_package_names is None:
self._all_package_names = []
# Get index modification time.
index_mtime = 0
if os.path.exists(self.index_file):
sinfo = os.stat(self.index_file)
index_mtime = sinfo.st_mtime
for pkg_name in os.listdir(self.packages_path):
# Skip non-directories in the package root.
pkg_dir = join_path(self.packages_path, pkg_name)
if not os.path.isdir(pkg_dir):
continue
# Skip directories without a package.py in them.
pkg_file = join_path(self.packages_path, pkg_name, package_file_name)
if not os.path.isfile(pkg_file):
continue
# Warn about invalid names that look like packages.
if not valid_module_name(pkg_name):
@ -685,14 +763,50 @@ def all_package_names(self):
% (pkg_dir, pkg_name))
continue
# construct the file name from the directory
pkg_file = join_path(
self.packages_path, pkg_name, package_file_name)
# Use stat here to avoid lots of calls to the filesystem.
try:
sinfo = os.stat(pkg_file)
except OSError as e:
if e.errno == errno.ENOENT:
# No package.py file here.
continue
elif e.errno == errno.EACCES:
tty.warn("Can't read package file %s." % pkg_file)
continue
raise e
# if it's not a file, skip it.
if stat.S_ISDIR(sinfo.st_mode):
continue
# All checks passed. Add it to the list.
self._all_package_names.append(pkg_name)
# record the package if it is newer than the index.
if sinfo.st_mtime > index_mtime:
self._needs_update.append(pkg_name)
self._all_package_names.sort()
return self._all_package_names
def all_package_names(self):
"""Returns a sorted list of all package names in the Repo."""
self._fast_package_check()
return self._all_package_names
def all_packages(self):
"""Iterator over all packages in the repository.
Use this with care, because loading packages is slow.
"""
for name in self.all_package_names():
yield self.get(name)

View file

@ -227,6 +227,20 @@ def merge(self, other):
spdict[provided_spec] += opdict[provided_spec]
def remove_provider(self, pkg_name):
"""Remove a provider from the ProviderIndex."""
for pkg in self.providers:
pkg_dict = self.providers[pkg]
for provided, pset in pkg_dict.items():
for provider in pset:
if provider.fullname == pkg_name:
pset.remove(provider)
if not pset:
del pkg_dict[provided]
if not pkg_dict:
del self.providers[pkg]
def copy(self):
"""Deep copy of this ProviderIndex."""
clone = ProviderIndex()