Record installation date and time in DB (#7334)

* Added installation date and time to the database

Information on the date and time of installation of a spec is recorded
into the database. The information is retained on reindexing.

* Expose the possibility to query for installation date

The DB can now be queried for specs that have been installed in a given
time window. This query possibility is exposed to command line via two
new options of the `find` command.

* Extended docstring for Database._add

* Use timestamps since the epoch instead of formatted date in the DB

* Allow 'pretty date' formats from command line

* Substituted kwargs with explicit arguments

* Simplified regex for pretty date strings. Added unit tests.
This commit is contained in:
Massimiliano Culpo 2018-03-22 19:07:27 +01:00 committed by Todd Gamblin
parent 6699ba8769
commit 5655895865
6 changed files with 255 additions and 44 deletions

View file

@ -29,7 +29,7 @@
import functools import functools
import collections import collections
import inspect import inspect
from datetime import datetime from datetime import datetime, timedelta
from six import string_types from six import string_types
# Ignore emacs backups when listing modules # Ignore emacs backups when listing modules
@ -442,6 +442,65 @@ def pretty_date(time, now=None):
return str(diff) + " years ago" return str(diff) + " years ago"
def pretty_string_to_date(date_str, now=None):
"""Parses a string representing a date and returns a datetime object.
Args:
date_str (str): string representing a date. This string might be
in different format (like ``YYYY``, ``YYYY-MM``, ``YYYY-MM-DD``)
or be a *pretty date* (like ``yesterday`` or ``two months ago``)
Returns:
(datetime): datetime object corresponding to ``date_str``
"""
pattern = {}
now = now or datetime.now()
# datetime formats
pattern[re.compile('^\d{4}$')] = lambda x: datetime.strptime(x, '%Y')
pattern[re.compile('^\d{4}-\d{2}$')] = lambda x: datetime.strptime(
x, '%Y-%m'
)
pattern[re.compile('^\d{4}-\d{2}-\d{2}$')] = lambda x: datetime.strptime(
x, '%Y-%m-%d'
)
pretty_regex = re.compile(
r'(a|\d+)\s*(year|month|week|day|hour|minute|second)s?\s*ago')
def _n_xxx_ago(x):
how_many, time_period = pretty_regex.search(x).groups()
how_many = 1 if how_many == 'a' else int(how_many)
# timedelta natively supports time periods up to 'weeks'.
# To apply month or year we convert to 30 and 365 days
if time_period == 'month':
how_many *= 30
time_period = 'day'
elif time_period == 'year':
how_many *= 365
time_period = 'day'
kwargs = {(time_period + 's'): how_many}
return now - timedelta(**kwargs)
pattern[pretty_regex] = _n_xxx_ago
# yesterday
callback = lambda x: now - timedelta(days=1)
pattern[re.compile('^yesterday$')] = callback
for regexp, parser in pattern.items():
if bool(regexp.match(date_str)):
return parser(date_str)
msg = 'date "{0}" does not match any valid format'.format(date_str)
raise ValueError(msg)
class RequiredAttributeError(ValueError): class RequiredAttributeError(ValueError):
def __init__(self, message): def __init__(self, message):

View file

@ -25,7 +25,9 @@
import sys import sys
import llnl.util.tty as tty import llnl.util.tty as tty
import llnl.util.lang
import spack import spack
import spack.database
import spack.cmd.common.arguments as arguments import spack.cmd.common.arguments as arguments
from spack.cmd import display_specs from spack.cmd import display_specs
@ -96,6 +98,14 @@ def setup_parser(subparser):
action='store_true', action='store_true',
help='show fully qualified package names') help='show fully qualified package names')
subparser.add_argument(
'--start-date',
help='earliest date of installation [YYYY-MM-DD]'
)
subparser.add_argument(
'--end-date', help='latest date of installation [YYYY-MM-DD]'
)
arguments.add_common_arguments(subparser, ['constraint']) arguments.add_common_arguments(subparser, ['constraint'])
@ -114,6 +124,13 @@ def query_arguments(args):
if args.implicit: if args.implicit:
explicit = False explicit = False
q_args = {'installed': installed, 'known': known, "explicit": explicit} q_args = {'installed': installed, 'known': known, "explicit": explicit}
# Time window of installation
for attribute in ('start_date', 'end_date'):
date = getattr(args, attribute)
if date:
q_args[attribute] = llnl.util.lang.pretty_string_to_date(date)
return q_args return q_args

View file

@ -39,6 +39,8 @@
filesystem. filesystem.
""" """
import datetime
import time
import os import os
import sys import sys
import socket import socket
@ -76,6 +78,11 @@
_tracked_deps = ('link', 'run') _tracked_deps = ('link', 'run')
def _now():
"""Returns the time since the epoch"""
return time.time()
def _autospec(function): def _autospec(function):
"""Decorator that automatically converts the argument of a single-arg """Decorator that automatically converts the argument of a single-arg
function to a Spec.""" function to a Spec."""
@ -103,14 +110,31 @@ class InstallRecord(object):
actually remove from the database until a spec has no installed actually remove from the database until a spec has no installed
dependents left. dependents left.
Args:
spec (Spec): spec tracked by the install record
path (str): path where the spec has been installed
installed (bool): whether or not the spec is currently installed
ref_count (int): number of specs that depend on this one
explicit (bool, optional): whether or not this spec was explicitly
installed, or pulled-in as a dependency of something else
installation_time (time, optional): time of the installation
""" """
def __init__(self, spec, path, installed, ref_count=0, explicit=False): def __init__(
self,
spec,
path,
installed,
ref_count=0,
explicit=False,
installation_time=None
):
self.spec = spec self.spec = spec
self.path = str(path) self.path = str(path)
self.installed = bool(installed) self.installed = bool(installed)
self.ref_count = ref_count self.ref_count = ref_count
self.explicit = explicit self.explicit = explicit
self.installation_time = installation_time or _now()
def to_dict(self): def to_dict(self):
return { return {
@ -118,14 +142,15 @@ def to_dict(self):
'path': self.path, 'path': self.path,
'installed': self.installed, 'installed': self.installed,
'ref_count': self.ref_count, 'ref_count': self.ref_count,
'explicit': self.explicit 'explicit': self.explicit,
'installation_time': self.installation_time
} }
@classmethod @classmethod
def from_dict(cls, spec, dictionary): def from_dict(cls, spec, dictionary):
d = dictionary d = dict(dictionary.items())
return InstallRecord(spec, d['path'], d['installed'], d['ref_count'], d.pop('spec', None)
d.get('explicit', False)) return InstallRecord(spec, **d)
class Database(object): class Database(object):
@ -347,7 +372,7 @@ def check(cond, msg):
def invalid_record(hash_key, error): def invalid_record(hash_key, error):
msg = ("Invalid record in Spack database: " msg = ("Invalid record in Spack database: "
"hash: %s, cause: %s: %s") "hash: %s, cause: %s: %s")
msg %= (hash_key, type(e).__name__, str(e)) msg %= (hash_key, type(error).__name__, str(error))
raise CorruptDatabaseError(msg, self._index_path) raise CorruptDatabaseError(msg, self._index_path)
# Build up the database in three passes: # Build up the database in three passes:
@ -442,12 +467,18 @@ def _read_suppress_error():
tty.debug( tty.debug(
'RECONSTRUCTING FROM SPEC.YAML: {0}'.format(spec)) 'RECONSTRUCTING FROM SPEC.YAML: {0}'.format(spec))
explicit = True explicit = True
inst_time = os.stat(spec.prefix).st_ctime
if old_data is not None: if old_data is not None:
old_info = old_data.get(spec.dag_hash()) old_info = old_data.get(spec.dag_hash())
if old_info is not None: if old_info is not None:
explicit = old_info.explicit explicit = old_info.explicit
inst_time = old_info.installation_time
self._add(spec, directory_layout, explicit=explicit) extra_args = {
'explicit': explicit,
'installation_time': inst_time
}
self._add(spec, directory_layout, **extra_args)
processed_specs.add(spec) processed_specs.add(spec)
@ -479,7 +510,8 @@ def _read_suppress_error():
kwargs = { kwargs = {
'spec': entry.spec, 'spec': entry.spec,
'directory_layout': layout, 'directory_layout': layout,
'explicit': entry.explicit 'explicit': entry.explicit,
'installation_time': entry.installation_time # noqa: E501
} }
self._add(**kwargs) self._add(**kwargs)
processed_specs.add(entry.spec) processed_specs.add(entry.spec)
@ -579,23 +611,52 @@ def _read(self):
self._write(None, None, None) self._write(None, None, None)
self.reindex(spack.store.layout) self.reindex(spack.store.layout)
def _add(self, spec, directory_layout=None, explicit=False): def _add(
self,
spec,
directory_layout=None,
explicit=False,
installation_time=None
):
"""Add an install record for this spec to the database. """Add an install record for this spec to the database.
Assumes spec is installed in ``layout.path_for_spec(spec)``. Assumes spec is installed in ``layout.path_for_spec(spec)``.
Also ensures dependencies are present and updated in the DB as Also ensures dependencies are present and updated in the DB as
either intsalled or missing. either installed or missing.
Args:
spec: spec to be added
directory_layout: layout of the spec installation
**kwargs:
explicit
Possible values: True, False, any
A spec that was installed following a specific user
request is marked as explicit. If instead it was
pulled-in as a dependency of a user requested spec
it's considered implicit.
installation_time
Date and time of installation
""" """
if not spec.concrete: if not spec.concrete:
raise NonConcreteSpecAddError( raise NonConcreteSpecAddError(
"Specs added to DB must be concrete.") "Specs added to DB must be concrete.")
# Retrieve optional arguments
installation_time = installation_time or _now()
for dep in spec.dependencies(_tracked_deps): for dep in spec.dependencies(_tracked_deps):
dkey = dep.dag_hash() dkey = dep.dag_hash()
if dkey not in self._data: if dkey not in self._data:
self._add(dep, directory_layout, explicit=False) extra_args = {
'explicit': False,
'installation_time': installation_time
}
self._add(dep, directory_layout, **extra_args)
key = spec.dag_hash() key = spec.dag_hash()
if key not in self._data: if key not in self._data:
@ -613,8 +674,13 @@ def _add(self, spec, directory_layout=None, explicit=False):
# Create a new install record with no deps initially. # Create a new install record with no deps initially.
new_spec = spec.copy(deps=False) new_spec = spec.copy(deps=False)
extra_args = {
'explicit': explicit,
'installation_time': installation_time
}
self._data[key] = InstallRecord( self._data[key] = InstallRecord(
new_spec, path, installed, ref_count=0, explicit=explicit) new_spec, path, installed, ref_count=0, **extra_args
)
# Connect dependencies from the DB to the new copy. # Connect dependencies from the DB to the new copy.
for name, dep in iteritems(spec.dependencies_dict(_tracked_deps)): for name, dep in iteritems(spec.dependencies_dict(_tracked_deps)):
@ -766,45 +832,57 @@ def activated_extensions_for(self, extendee_spec, extensions_layout=None):
continue continue
# TODO: conditional way to do this instead of catching exceptions # TODO: conditional way to do this instead of catching exceptions
def query(self, query_spec=any, known=any, installed=True, explicit=any): def query(
"""Run a query on the database. self,
query_spec=any,
known=any,
installed=True,
explicit=any,
start_date=None,
end_date=None
):
"""Run a query on the database
``query_spec`` Args:
Queries iterate through specs in the database and return query_spec: queries iterate through specs in the database and
those that satisfy the supplied ``query_spec``. If return those that satisfy the supplied ``query_spec``. If
query_spec is `any`, This will match all specs in the query_spec is `any`, This will match all specs in the
database. If it is a spec, we'll evaluate database. If it is a spec, we'll evaluate
``spec.satisfies(query_spec)``. ``spec.satisfies(query_spec)``
The query can be constrained by two additional attributes: known (bool or any, optional): Specs that are "known" are those
for which Spack can locate a ``package.py`` file -- i.e.,
Spack "knows" how to install them. Specs that are unknown may
represent packages that existed in a previous version of
Spack, but have since either changed their name or
been removed
``known`` installed (bool or any, optional): Specs for which a prefix exists
Possible values: True, False, any are "installed". A spec that is NOT installed will be in the
database if some other spec depends on it but its installation
has gone away since Spack installed it.
Specs that are "known" are those for which Spack can explicit (bool or any, optional): A spec that was installed
locate a ``package.py`` file -- i.e., Spack "knows" how to following a specific user request is marked as explicit. If
install them. Specs that are unknown may represent instead it was pulled-in as a dependency of a user requested
packages that existed in a previous version of Spack, but spec it's considered implicit.
have since either changed their name or been removed.
``installed`` start_date (datetime, optional): filters the query discarding
Possible values: True, False, any specs that have been installed before ``start_date``.
Specs for which a prefix exists are "installed". A spec end_date (datetime, optional): filters the query discarding
that is NOT installed will be in the database if some specs that have been installed after ``end_date``.
other spec depends on it but its installation has gone
away since Spack installed it.
TODO: Specs are a lot like queries. Should there be a
wildcard spec object, and should specs have attributes
like installed and known that can be queried? Or are
these really special cases that only belong here?
Returns:
list of specs that match the query
""" """
# TODO: Specs are a lot like queries. Should there be a
# TODO: wildcard spec object, and should specs have attributes
# TODO: like installed and known that can be queried? Or are
# TODO: these really special cases that only belong here?
with self.read_transaction(): with self.read_transaction():
# Just look up concrete specs with hashes; no fancy search. # Just look up concrete specs with hashes; no fancy search.
if (isinstance(query_spec, spack.spec.Spec) and if isinstance(query_spec, spack.spec.Spec) and query_spec.concrete:
query_spec._concrete):
hash_key = query_spec.dag_hash() hash_key = query_spec.dag_hash()
if hash_key in self._data: if hash_key in self._data:
@ -815,14 +893,26 @@ def query(self, query_spec=any, known=any, installed=True, explicit=any):
# Abstract specs require more work -- currently we test # Abstract specs require more work -- currently we test
# against everything. # against everything.
results = [] results = []
start_date = start_date or datetime.datetime.min
end_date = end_date or datetime.datetime.max
for key, rec in self._data.items(): for key, rec in self._data.items():
if installed is not any and rec.installed != installed: if installed is not any and rec.installed != installed:
continue continue
if explicit is not any and rec.explicit != explicit: if explicit is not any and rec.explicit != explicit:
continue continue
if known is not any and spack.repo.exists( if known is not any and spack.repo.exists(
rec.spec.name) != known: rec.spec.name) != known:
continue continue
inst_date = datetime.datetime.fromtimestamp(
rec.installation_time
)
if not (start_date < inst_date < end_date):
continue
if query_spec is any or rec.spec.satisfies(query_spec): if query_spec is any or rec.spec.satisfies(query_spec):
results.append(rec.spec) results.append(rec.spec)
@ -835,7 +925,8 @@ def query_one(self, query_spec, known=any, installed=True):
query. Returns None if no installed package matches. query. Returns None if no installed package matches.
""" """
concrete_specs = self.query(query_spec, known, installed) concrete_specs = self.query(
query_spec, known=known, installed=installed)
assert len(concrete_specs) <= 1 assert len(concrete_specs) <= 1
return concrete_specs[0] if concrete_specs else None return concrete_specs[0] if concrete_specs else None

View file

@ -62,7 +62,9 @@ def test_query_arguments():
missing=False, missing=False,
unknown=False, unknown=False,
explicit=False, explicit=False,
implicit=False implicit=False,
start_date="2018-02-23",
end_date=None
) )
q_args = query_arguments(args) q_args = query_arguments(args)
@ -72,6 +74,8 @@ def test_query_arguments():
assert q_args['installed'] is True assert q_args['installed'] is True
assert q_args['known'] is any assert q_args['known'] is any
assert q_args['explicit'] is any assert q_args['explicit'] is any
assert 'start_date' in q_args
assert 'end_date' not in q_args
# Check that explicit works correctly # Check that explicit works correctly
args.explicit = True args.explicit = True

View file

@ -26,6 +26,7 @@
These tests check the database is functioning properly, These tests check the database is functioning properly,
both in memory and in its file both in memory and in its file
""" """
import datetime
import multiprocessing import multiprocessing
import os import os
import pytest import pytest
@ -293,6 +294,12 @@ def test_050_basic_query(database):
assert len(install_db.query('mpileaks ^mpich2')) == 1 assert len(install_db.query('mpileaks ^mpich2')) == 1
assert len(install_db.query('mpileaks ^zmpi')) == 1 assert len(install_db.query('mpileaks ^zmpi')) == 1
# Query by date
assert len(install_db.query(start_date=datetime.datetime.min)) == 16
assert len(install_db.query(start_date=datetime.datetime.max)) == 0
assert len(install_db.query(end_date=datetime.datetime.min)) == 0
assert len(install_db.query(end_date=datetime.datetime.max)) == 16
def test_060_remove_and_add_root_package(database): def test_060_remove_and_add_root_package(database):
install_db = database.mock.db install_db = database.mock.db

View file

@ -23,11 +23,18 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
############################################################################## ##############################################################################
import pytest import pytest
from datetime import datetime, timedelta from datetime import datetime, timedelta
import llnl.util.lang
from llnl.util.lang import pretty_date, match_predicate from llnl.util.lang import pretty_date, match_predicate
@pytest.fixture()
def now():
return datetime.now()
def test_pretty_date(): def test_pretty_date():
"""Make sure pretty_date prints the right dates.""" """Make sure pretty_date prints the right dates."""
now = datetime.now() now = datetime.now()
@ -75,6 +82,32 @@ def test_pretty_date():
assert pretty_date(years, now) == "2 years ago" assert pretty_date(years, now) == "2 years ago"
@pytest.mark.parametrize('delta,pretty_string', [
(timedelta(days=1), 'a day ago'),
(timedelta(days=1), 'yesterday'),
(timedelta(days=1), '1 day ago'),
(timedelta(weeks=1), '1 week ago'),
(timedelta(weeks=3), '3 weeks ago'),
(timedelta(days=30), '1 month ago'),
(timedelta(days=730), '2 years ago'),
])
def test_pretty_string_to_date_delta(now, delta, pretty_string):
t1 = now - delta
t2 = llnl.util.lang.pretty_string_to_date(pretty_string, now)
assert t1 == t2
@pytest.mark.parametrize('format,pretty_string', [
('%Y', '2018'),
('%Y-%m', '2015-03'),
('%Y-%m-%d', '2015-03-28'),
])
def test_pretty_string_to_date(format, pretty_string):
t1 = datetime.strptime(pretty_string, format)
t2 = llnl.util.lang.pretty_string_to_date(pretty_string, now)
assert t1 == t2
def test_match_predicate(): def test_match_predicate():
matcher = match_predicate(lambda x: True) matcher = match_predicate(lambda x: True)
assert matcher('foo') assert matcher('foo')