Record installation date and time in DB (#7334)

* Added installation date and time to the database

Information on the date and time of installation of a spec is recorded
into the database. The information is retained on reindexing.

* Expose the possibility to query for installation date

The DB can now be queried for specs that have been installed in a given
time window. This query possibility is exposed to command line via two
new options of the `find` command.

* Extended docstring for Database._add

* Use timestamps since the epoch instead of formatted date in the DB

* Allow 'pretty date' formats from command line

* Substituted kwargs with explicit arguments

* Simplified regex for pretty date strings. Added unit tests.
This commit is contained in:
Massimiliano Culpo 2018-03-22 19:07:27 +01:00 committed by Todd Gamblin
parent 6699ba8769
commit 5655895865
6 changed files with 255 additions and 44 deletions

View file

@ -29,7 +29,7 @@
import functools
import collections
import inspect
from datetime import datetime
from datetime import datetime, timedelta
from six import string_types
# Ignore emacs backups when listing modules
@ -442,6 +442,65 @@ def pretty_date(time, now=None):
return str(diff) + " years ago"
def pretty_string_to_date(date_str, now=None):
"""Parses a string representing a date and returns a datetime object.
Args:
date_str (str): string representing a date. This string might be
in different format (like ``YYYY``, ``YYYY-MM``, ``YYYY-MM-DD``)
or be a *pretty date* (like ``yesterday`` or ``two months ago``)
Returns:
(datetime): datetime object corresponding to ``date_str``
"""
pattern = {}
now = now or datetime.now()
# datetime formats
pattern[re.compile('^\d{4}$')] = lambda x: datetime.strptime(x, '%Y')
pattern[re.compile('^\d{4}-\d{2}$')] = lambda x: datetime.strptime(
x, '%Y-%m'
)
pattern[re.compile('^\d{4}-\d{2}-\d{2}$')] = lambda x: datetime.strptime(
x, '%Y-%m-%d'
)
pretty_regex = re.compile(
r'(a|\d+)\s*(year|month|week|day|hour|minute|second)s?\s*ago')
def _n_xxx_ago(x):
how_many, time_period = pretty_regex.search(x).groups()
how_many = 1 if how_many == 'a' else int(how_many)
# timedelta natively supports time periods up to 'weeks'.
# To apply month or year we convert to 30 and 365 days
if time_period == 'month':
how_many *= 30
time_period = 'day'
elif time_period == 'year':
how_many *= 365
time_period = 'day'
kwargs = {(time_period + 's'): how_many}
return now - timedelta(**kwargs)
pattern[pretty_regex] = _n_xxx_ago
# yesterday
callback = lambda x: now - timedelta(days=1)
pattern[re.compile('^yesterday$')] = callback
for regexp, parser in pattern.items():
if bool(regexp.match(date_str)):
return parser(date_str)
msg = 'date "{0}" does not match any valid format'.format(date_str)
raise ValueError(msg)
class RequiredAttributeError(ValueError):
def __init__(self, message):

View file

@ -25,7 +25,9 @@
import sys
import llnl.util.tty as tty
import llnl.util.lang
import spack
import spack.database
import spack.cmd.common.arguments as arguments
from spack.cmd import display_specs
@ -96,6 +98,14 @@ def setup_parser(subparser):
action='store_true',
help='show fully qualified package names')
subparser.add_argument(
'--start-date',
help='earliest date of installation [YYYY-MM-DD]'
)
subparser.add_argument(
'--end-date', help='latest date of installation [YYYY-MM-DD]'
)
arguments.add_common_arguments(subparser, ['constraint'])
@ -114,6 +124,13 @@ def query_arguments(args):
if args.implicit:
explicit = False
q_args = {'installed': installed, 'known': known, "explicit": explicit}
# Time window of installation
for attribute in ('start_date', 'end_date'):
date = getattr(args, attribute)
if date:
q_args[attribute] = llnl.util.lang.pretty_string_to_date(date)
return q_args

View file

@ -39,6 +39,8 @@
filesystem.
"""
import datetime
import time
import os
import sys
import socket
@ -76,6 +78,11 @@
_tracked_deps = ('link', 'run')
def _now():
"""Returns the time since the epoch"""
return time.time()
def _autospec(function):
"""Decorator that automatically converts the argument of a single-arg
function to a Spec."""
@ -103,14 +110,31 @@ class InstallRecord(object):
actually remove from the database until a spec has no installed
dependents left.
Args:
spec (Spec): spec tracked by the install record
path (str): path where the spec has been installed
installed (bool): whether or not the spec is currently installed
ref_count (int): number of specs that depend on this one
explicit (bool, optional): whether or not this spec was explicitly
installed, or pulled-in as a dependency of something else
installation_time (time, optional): time of the installation
"""
def __init__(self, spec, path, installed, ref_count=0, explicit=False):
def __init__(
self,
spec,
path,
installed,
ref_count=0,
explicit=False,
installation_time=None
):
self.spec = spec
self.path = str(path)
self.installed = bool(installed)
self.ref_count = ref_count
self.explicit = explicit
self.installation_time = installation_time or _now()
def to_dict(self):
return {
@ -118,14 +142,15 @@ def to_dict(self):
'path': self.path,
'installed': self.installed,
'ref_count': self.ref_count,
'explicit': self.explicit
'explicit': self.explicit,
'installation_time': self.installation_time
}
@classmethod
def from_dict(cls, spec, dictionary):
d = dictionary
return InstallRecord(spec, d['path'], d['installed'], d['ref_count'],
d.get('explicit', False))
d = dict(dictionary.items())
d.pop('spec', None)
return InstallRecord(spec, **d)
class Database(object):
@ -347,7 +372,7 @@ def check(cond, msg):
def invalid_record(hash_key, error):
msg = ("Invalid record in Spack database: "
"hash: %s, cause: %s: %s")
msg %= (hash_key, type(e).__name__, str(e))
msg %= (hash_key, type(error).__name__, str(error))
raise CorruptDatabaseError(msg, self._index_path)
# Build up the database in three passes:
@ -442,12 +467,18 @@ def _read_suppress_error():
tty.debug(
'RECONSTRUCTING FROM SPEC.YAML: {0}'.format(spec))
explicit = True
inst_time = os.stat(spec.prefix).st_ctime
if old_data is not None:
old_info = old_data.get(spec.dag_hash())
if old_info is not None:
explicit = old_info.explicit
inst_time = old_info.installation_time
self._add(spec, directory_layout, explicit=explicit)
extra_args = {
'explicit': explicit,
'installation_time': inst_time
}
self._add(spec, directory_layout, **extra_args)
processed_specs.add(spec)
@ -479,7 +510,8 @@ def _read_suppress_error():
kwargs = {
'spec': entry.spec,
'directory_layout': layout,
'explicit': entry.explicit
'explicit': entry.explicit,
'installation_time': entry.installation_time # noqa: E501
}
self._add(**kwargs)
processed_specs.add(entry.spec)
@ -579,23 +611,52 @@ def _read(self):
self._write(None, None, None)
self.reindex(spack.store.layout)
def _add(self, spec, directory_layout=None, explicit=False):
def _add(
self,
spec,
directory_layout=None,
explicit=False,
installation_time=None
):
"""Add an install record for this spec to the database.
Assumes spec is installed in ``layout.path_for_spec(spec)``.
Also ensures dependencies are present and updated in the DB as
either intsalled or missing.
either installed or missing.
Args:
spec: spec to be added
directory_layout: layout of the spec installation
**kwargs:
explicit
Possible values: True, False, any
A spec that was installed following a specific user
request is marked as explicit. If instead it was
pulled-in as a dependency of a user requested spec
it's considered implicit.
installation_time
Date and time of installation
"""
if not spec.concrete:
raise NonConcreteSpecAddError(
"Specs added to DB must be concrete.")
# Retrieve optional arguments
installation_time = installation_time or _now()
for dep in spec.dependencies(_tracked_deps):
dkey = dep.dag_hash()
if dkey not in self._data:
self._add(dep, directory_layout, explicit=False)
extra_args = {
'explicit': False,
'installation_time': installation_time
}
self._add(dep, directory_layout, **extra_args)
key = spec.dag_hash()
if key not in self._data:
@ -613,8 +674,13 @@ def _add(self, spec, directory_layout=None, explicit=False):
# Create a new install record with no deps initially.
new_spec = spec.copy(deps=False)
extra_args = {
'explicit': explicit,
'installation_time': installation_time
}
self._data[key] = InstallRecord(
new_spec, path, installed, ref_count=0, explicit=explicit)
new_spec, path, installed, ref_count=0, **extra_args
)
# Connect dependencies from the DB to the new copy.
for name, dep in iteritems(spec.dependencies_dict(_tracked_deps)):
@ -766,45 +832,57 @@ def activated_extensions_for(self, extendee_spec, extensions_layout=None):
continue
# TODO: conditional way to do this instead of catching exceptions
def query(self, query_spec=any, known=any, installed=True, explicit=any):
"""Run a query on the database.
def query(
self,
query_spec=any,
known=any,
installed=True,
explicit=any,
start_date=None,
end_date=None
):
"""Run a query on the database
``query_spec``
Queries iterate through specs in the database and return
those that satisfy the supplied ``query_spec``. If
query_spec is `any`, This will match all specs in the
database. If it is a spec, we'll evaluate
``spec.satisfies(query_spec)``.
Args:
query_spec: queries iterate through specs in the database and
return those that satisfy the supplied ``query_spec``. If
query_spec is `any`, This will match all specs in the
database. If it is a spec, we'll evaluate
``spec.satisfies(query_spec)``
The query can be constrained by two additional attributes:
known (bool or any, optional): Specs that are "known" are those
for which Spack can locate a ``package.py`` file -- i.e.,
Spack "knows" how to install them. Specs that are unknown may
represent packages that existed in a previous version of
Spack, but have since either changed their name or
been removed
``known``
Possible values: True, False, any
installed (bool or any, optional): Specs for which a prefix exists
are "installed". A spec that is NOT installed will be in the
database if some other spec depends on it but its installation
has gone away since Spack installed it.
Specs that are "known" are those for which Spack can
locate a ``package.py`` file -- i.e., Spack "knows" how to
install them. Specs that are unknown may represent
packages that existed in a previous version of Spack, but
have since either changed their name or been removed.
explicit (bool or any, optional): A spec that was installed
following a specific user request is marked as explicit. If
instead it was pulled-in as a dependency of a user requested
spec it's considered implicit.
``installed``
Possible values: True, False, any
start_date (datetime, optional): filters the query discarding
specs that have been installed before ``start_date``.
Specs for which a prefix exists are "installed". A spec
that is NOT installed will be in the database if some
other spec depends on it but its installation has gone
away since Spack installed it.
TODO: Specs are a lot like queries. Should there be a
wildcard spec object, and should specs have attributes
like installed and known that can be queried? Or are
these really special cases that only belong here?
end_date (datetime, optional): filters the query discarding
specs that have been installed after ``end_date``.
Returns:
list of specs that match the query
"""
# TODO: Specs are a lot like queries. Should there be a
# TODO: wildcard spec object, and should specs have attributes
# TODO: like installed and known that can be queried? Or are
# TODO: these really special cases that only belong here?
with self.read_transaction():
# Just look up concrete specs with hashes; no fancy search.
if (isinstance(query_spec, spack.spec.Spec) and
query_spec._concrete):
if isinstance(query_spec, spack.spec.Spec) and query_spec.concrete:
hash_key = query_spec.dag_hash()
if hash_key in self._data:
@ -815,14 +893,26 @@ def query(self, query_spec=any, known=any, installed=True, explicit=any):
# Abstract specs require more work -- currently we test
# against everything.
results = []
start_date = start_date or datetime.datetime.min
end_date = end_date or datetime.datetime.max
for key, rec in self._data.items():
if installed is not any and rec.installed != installed:
continue
if explicit is not any and rec.explicit != explicit:
continue
if known is not any and spack.repo.exists(
rec.spec.name) != known:
continue
inst_date = datetime.datetime.fromtimestamp(
rec.installation_time
)
if not (start_date < inst_date < end_date):
continue
if query_spec is any or rec.spec.satisfies(query_spec):
results.append(rec.spec)
@ -835,7 +925,8 @@ def query_one(self, query_spec, known=any, installed=True):
query. Returns None if no installed package matches.
"""
concrete_specs = self.query(query_spec, known, installed)
concrete_specs = self.query(
query_spec, known=known, installed=installed)
assert len(concrete_specs) <= 1
return concrete_specs[0] if concrete_specs else None

View file

@ -62,7 +62,9 @@ def test_query_arguments():
missing=False,
unknown=False,
explicit=False,
implicit=False
implicit=False,
start_date="2018-02-23",
end_date=None
)
q_args = query_arguments(args)
@ -72,6 +74,8 @@ def test_query_arguments():
assert q_args['installed'] is True
assert q_args['known'] is any
assert q_args['explicit'] is any
assert 'start_date' in q_args
assert 'end_date' not in q_args
# Check that explicit works correctly
args.explicit = True

View file

@ -26,6 +26,7 @@
These tests check the database is functioning properly,
both in memory and in its file
"""
import datetime
import multiprocessing
import os
import pytest
@ -293,6 +294,12 @@ def test_050_basic_query(database):
assert len(install_db.query('mpileaks ^mpich2')) == 1
assert len(install_db.query('mpileaks ^zmpi')) == 1
# Query by date
assert len(install_db.query(start_date=datetime.datetime.min)) == 16
assert len(install_db.query(start_date=datetime.datetime.max)) == 0
assert len(install_db.query(end_date=datetime.datetime.min)) == 0
assert len(install_db.query(end_date=datetime.datetime.max)) == 16
def test_060_remove_and_add_root_package(database):
install_db = database.mock.db

View file

@ -23,11 +23,18 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
##############################################################################
import pytest
from datetime import datetime, timedelta
import llnl.util.lang
from llnl.util.lang import pretty_date, match_predicate
@pytest.fixture()
def now():
return datetime.now()
def test_pretty_date():
"""Make sure pretty_date prints the right dates."""
now = datetime.now()
@ -75,6 +82,32 @@ def test_pretty_date():
assert pretty_date(years, now) == "2 years ago"
@pytest.mark.parametrize('delta,pretty_string', [
(timedelta(days=1), 'a day ago'),
(timedelta(days=1), 'yesterday'),
(timedelta(days=1), '1 day ago'),
(timedelta(weeks=1), '1 week ago'),
(timedelta(weeks=3), '3 weeks ago'),
(timedelta(days=30), '1 month ago'),
(timedelta(days=730), '2 years ago'),
])
def test_pretty_string_to_date_delta(now, delta, pretty_string):
t1 = now - delta
t2 = llnl.util.lang.pretty_string_to_date(pretty_string, now)
assert t1 == t2
@pytest.mark.parametrize('format,pretty_string', [
('%Y', '2018'),
('%Y-%m', '2015-03'),
('%Y-%m-%d', '2015-03-28'),
])
def test_pretty_string_to_date(format, pretty_string):
t1 = datetime.strptime(pretty_string, format)
t2 = llnl.util.lang.pretty_string_to_date(pretty_string, now)
assert t1 == t2
def test_match_predicate():
matcher = match_predicate(lambda x: True)
assert matcher('foo')