From 56558958652f772ba07be011f493b8c733404dfd Mon Sep 17 00:00:00 2001 From: Massimiliano Culpo Date: Thu, 22 Mar 2018 19:07:27 +0100 Subject: [PATCH] Record installation date and time in DB (#7334) * Added installation date and time to the database Information on the date and time of installation of a spec is recorded into the database. The information is retained on reindexing. * Expose the possibility to query for installation date The DB can now be queried for specs that have been installed in a given time window. This query possibility is exposed to command line via two new options of the `find` command. * Extended docstring for Database._add * Use timestamps since the epoch instead of formatted date in the DB * Allow 'pretty date' formats from command line * Substituted kwargs with explicit arguments * Simplified regex for pretty date strings. Added unit tests. --- lib/spack/llnl/util/lang.py | 61 ++++++++- lib/spack/spack/cmd/find.py | 17 +++ lib/spack/spack/database.py | 175 +++++++++++++++++++------ lib/spack/spack/test/cmd/find.py | 6 +- lib/spack/spack/test/database.py | 7 + lib/spack/spack/test/llnl/util/lang.py | 33 +++++ 6 files changed, 255 insertions(+), 44 deletions(-) diff --git a/lib/spack/llnl/util/lang.py b/lib/spack/llnl/util/lang.py index e586bc268d..f8cee2892b 100644 --- a/lib/spack/llnl/util/lang.py +++ b/lib/spack/llnl/util/lang.py @@ -29,7 +29,7 @@ import functools import collections import inspect -from datetime import datetime +from datetime import datetime, timedelta from six import string_types # Ignore emacs backups when listing modules @@ -442,6 +442,65 @@ def pretty_date(time, now=None): return str(diff) + " years ago" +def pretty_string_to_date(date_str, now=None): + """Parses a string representing a date and returns a datetime object. + + Args: + date_str (str): string representing a date. This string might be + in different format (like ``YYYY``, ``YYYY-MM``, ``YYYY-MM-DD``) + or be a *pretty date* (like ``yesterday`` or ``two months ago``) + + Returns: + (datetime): datetime object corresponding to ``date_str`` + """ + + pattern = {} + + now = now or datetime.now() + + # datetime formats + pattern[re.compile('^\d{4}$')] = lambda x: datetime.strptime(x, '%Y') + pattern[re.compile('^\d{4}-\d{2}$')] = lambda x: datetime.strptime( + x, '%Y-%m' + ) + pattern[re.compile('^\d{4}-\d{2}-\d{2}$')] = lambda x: datetime.strptime( + x, '%Y-%m-%d' + ) + + pretty_regex = re.compile( + r'(a|\d+)\s*(year|month|week|day|hour|minute|second)s?\s*ago') + + def _n_xxx_ago(x): + how_many, time_period = pretty_regex.search(x).groups() + + how_many = 1 if how_many == 'a' else int(how_many) + + # timedelta natively supports time periods up to 'weeks'. + # To apply month or year we convert to 30 and 365 days + if time_period == 'month': + how_many *= 30 + time_period = 'day' + elif time_period == 'year': + how_many *= 365 + time_period = 'day' + + kwargs = {(time_period + 's'): how_many} + return now - timedelta(**kwargs) + + pattern[pretty_regex] = _n_xxx_ago + + # yesterday + callback = lambda x: now - timedelta(days=1) + pattern[re.compile('^yesterday$')] = callback + + for regexp, parser in pattern.items(): + if bool(regexp.match(date_str)): + return parser(date_str) + + msg = 'date "{0}" does not match any valid format'.format(date_str) + raise ValueError(msg) + + class RequiredAttributeError(ValueError): def __init__(self, message): diff --git a/lib/spack/spack/cmd/find.py b/lib/spack/spack/cmd/find.py index 71102563c3..41bc6603ed 100644 --- a/lib/spack/spack/cmd/find.py +++ b/lib/spack/spack/cmd/find.py @@ -25,7 +25,9 @@ import sys import llnl.util.tty as tty +import llnl.util.lang import spack +import spack.database import spack.cmd.common.arguments as arguments from spack.cmd import display_specs @@ -96,6 +98,14 @@ def setup_parser(subparser): action='store_true', help='show fully qualified package names') + subparser.add_argument( + '--start-date', + help='earliest date of installation [YYYY-MM-DD]' + ) + subparser.add_argument( + '--end-date', help='latest date of installation [YYYY-MM-DD]' + ) + arguments.add_common_arguments(subparser, ['constraint']) @@ -114,6 +124,13 @@ def query_arguments(args): if args.implicit: explicit = False q_args = {'installed': installed, 'known': known, "explicit": explicit} + + # Time window of installation + for attribute in ('start_date', 'end_date'): + date = getattr(args, attribute) + if date: + q_args[attribute] = llnl.util.lang.pretty_string_to_date(date) + return q_args diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index fd4aae00cd..c857802ab9 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -39,6 +39,8 @@ filesystem. """ +import datetime +import time import os import sys import socket @@ -76,6 +78,11 @@ _tracked_deps = ('link', 'run') +def _now(): + """Returns the time since the epoch""" + return time.time() + + def _autospec(function): """Decorator that automatically converts the argument of a single-arg function to a Spec.""" @@ -103,14 +110,31 @@ class InstallRecord(object): actually remove from the database until a spec has no installed dependents left. + Args: + spec (Spec): spec tracked by the install record + path (str): path where the spec has been installed + installed (bool): whether or not the spec is currently installed + ref_count (int): number of specs that depend on this one + explicit (bool, optional): whether or not this spec was explicitly + installed, or pulled-in as a dependency of something else + installation_time (time, optional): time of the installation """ - def __init__(self, spec, path, installed, ref_count=0, explicit=False): + def __init__( + self, + spec, + path, + installed, + ref_count=0, + explicit=False, + installation_time=None + ): self.spec = spec self.path = str(path) self.installed = bool(installed) self.ref_count = ref_count self.explicit = explicit + self.installation_time = installation_time or _now() def to_dict(self): return { @@ -118,14 +142,15 @@ def to_dict(self): 'path': self.path, 'installed': self.installed, 'ref_count': self.ref_count, - 'explicit': self.explicit + 'explicit': self.explicit, + 'installation_time': self.installation_time } @classmethod def from_dict(cls, spec, dictionary): - d = dictionary - return InstallRecord(spec, d['path'], d['installed'], d['ref_count'], - d.get('explicit', False)) + d = dict(dictionary.items()) + d.pop('spec', None) + return InstallRecord(spec, **d) class Database(object): @@ -347,7 +372,7 @@ def check(cond, msg): def invalid_record(hash_key, error): msg = ("Invalid record in Spack database: " "hash: %s, cause: %s: %s") - msg %= (hash_key, type(e).__name__, str(e)) + msg %= (hash_key, type(error).__name__, str(error)) raise CorruptDatabaseError(msg, self._index_path) # Build up the database in three passes: @@ -442,12 +467,18 @@ def _read_suppress_error(): tty.debug( 'RECONSTRUCTING FROM SPEC.YAML: {0}'.format(spec)) explicit = True + inst_time = os.stat(spec.prefix).st_ctime if old_data is not None: old_info = old_data.get(spec.dag_hash()) if old_info is not None: explicit = old_info.explicit + inst_time = old_info.installation_time - self._add(spec, directory_layout, explicit=explicit) + extra_args = { + 'explicit': explicit, + 'installation_time': inst_time + } + self._add(spec, directory_layout, **extra_args) processed_specs.add(spec) @@ -479,7 +510,8 @@ def _read_suppress_error(): kwargs = { 'spec': entry.spec, 'directory_layout': layout, - 'explicit': entry.explicit + 'explicit': entry.explicit, + 'installation_time': entry.installation_time # noqa: E501 } self._add(**kwargs) processed_specs.add(entry.spec) @@ -579,23 +611,52 @@ def _read(self): self._write(None, None, None) self.reindex(spack.store.layout) - def _add(self, spec, directory_layout=None, explicit=False): + def _add( + self, + spec, + directory_layout=None, + explicit=False, + installation_time=None + ): """Add an install record for this spec to the database. Assumes spec is installed in ``layout.path_for_spec(spec)``. Also ensures dependencies are present and updated in the DB as - either intsalled or missing. + either installed or missing. + + Args: + spec: spec to be added + directory_layout: layout of the spec installation + **kwargs: + + explicit + Possible values: True, False, any + + A spec that was installed following a specific user + request is marked as explicit. If instead it was + pulled-in as a dependency of a user requested spec + it's considered implicit. + + installation_time + Date and time of installation """ if not spec.concrete: raise NonConcreteSpecAddError( "Specs added to DB must be concrete.") + # Retrieve optional arguments + installation_time = installation_time or _now() + for dep in spec.dependencies(_tracked_deps): dkey = dep.dag_hash() if dkey not in self._data: - self._add(dep, directory_layout, explicit=False) + extra_args = { + 'explicit': False, + 'installation_time': installation_time + } + self._add(dep, directory_layout, **extra_args) key = spec.dag_hash() if key not in self._data: @@ -613,8 +674,13 @@ def _add(self, spec, directory_layout=None, explicit=False): # Create a new install record with no deps initially. new_spec = spec.copy(deps=False) + extra_args = { + 'explicit': explicit, + 'installation_time': installation_time + } self._data[key] = InstallRecord( - new_spec, path, installed, ref_count=0, explicit=explicit) + new_spec, path, installed, ref_count=0, **extra_args + ) # Connect dependencies from the DB to the new copy. for name, dep in iteritems(spec.dependencies_dict(_tracked_deps)): @@ -766,45 +832,57 @@ def activated_extensions_for(self, extendee_spec, extensions_layout=None): continue # TODO: conditional way to do this instead of catching exceptions - def query(self, query_spec=any, known=any, installed=True, explicit=any): - """Run a query on the database. + def query( + self, + query_spec=any, + known=any, + installed=True, + explicit=any, + start_date=None, + end_date=None + ): + """Run a query on the database - ``query_spec`` - Queries iterate through specs in the database and return - those that satisfy the supplied ``query_spec``. If - query_spec is `any`, This will match all specs in the - database. If it is a spec, we'll evaluate - ``spec.satisfies(query_spec)``. + Args: + query_spec: queries iterate through specs in the database and + return those that satisfy the supplied ``query_spec``. If + query_spec is `any`, This will match all specs in the + database. If it is a spec, we'll evaluate + ``spec.satisfies(query_spec)`` - The query can be constrained by two additional attributes: + known (bool or any, optional): Specs that are "known" are those + for which Spack can locate a ``package.py`` file -- i.e., + Spack "knows" how to install them. Specs that are unknown may + represent packages that existed in a previous version of + Spack, but have since either changed their name or + been removed - ``known`` - Possible values: True, False, any + installed (bool or any, optional): Specs for which a prefix exists + are "installed". A spec that is NOT installed will be in the + database if some other spec depends on it but its installation + has gone away since Spack installed it. - Specs that are "known" are those for which Spack can - locate a ``package.py`` file -- i.e., Spack "knows" how to - install them. Specs that are unknown may represent - packages that existed in a previous version of Spack, but - have since either changed their name or been removed. + explicit (bool or any, optional): A spec that was installed + following a specific user request is marked as explicit. If + instead it was pulled-in as a dependency of a user requested + spec it's considered implicit. - ``installed`` - Possible values: True, False, any + start_date (datetime, optional): filters the query discarding + specs that have been installed before ``start_date``. - Specs for which a prefix exists are "installed". A spec - that is NOT installed will be in the database if some - other spec depends on it but its installation has gone - away since Spack installed it. - - TODO: Specs are a lot like queries. Should there be a - wildcard spec object, and should specs have attributes - like installed and known that can be queried? Or are - these really special cases that only belong here? + end_date (datetime, optional): filters the query discarding + specs that have been installed after ``end_date``. + Returns: + list of specs that match the query """ + # TODO: Specs are a lot like queries. Should there be a + # TODO: wildcard spec object, and should specs have attributes + # TODO: like installed and known that can be queried? Or are + # TODO: these really special cases that only belong here? with self.read_transaction(): # Just look up concrete specs with hashes; no fancy search. - if (isinstance(query_spec, spack.spec.Spec) and - query_spec._concrete): + if isinstance(query_spec, spack.spec.Spec) and query_spec.concrete: hash_key = query_spec.dag_hash() if hash_key in self._data: @@ -815,14 +893,26 @@ def query(self, query_spec=any, known=any, installed=True, explicit=any): # Abstract specs require more work -- currently we test # against everything. results = [] + start_date = start_date or datetime.datetime.min + end_date = end_date or datetime.datetime.max + for key, rec in self._data.items(): if installed is not any and rec.installed != installed: continue + if explicit is not any and rec.explicit != explicit: continue + if known is not any and spack.repo.exists( rec.spec.name) != known: continue + + inst_date = datetime.datetime.fromtimestamp( + rec.installation_time + ) + if not (start_date < inst_date < end_date): + continue + if query_spec is any or rec.spec.satisfies(query_spec): results.append(rec.spec) @@ -835,7 +925,8 @@ def query_one(self, query_spec, known=any, installed=True): query. Returns None if no installed package matches. """ - concrete_specs = self.query(query_spec, known, installed) + concrete_specs = self.query( + query_spec, known=known, installed=installed) assert len(concrete_specs) <= 1 return concrete_specs[0] if concrete_specs else None diff --git a/lib/spack/spack/test/cmd/find.py b/lib/spack/spack/test/cmd/find.py index b946141a87..0e88aadd43 100644 --- a/lib/spack/spack/test/cmd/find.py +++ b/lib/spack/spack/test/cmd/find.py @@ -62,7 +62,9 @@ def test_query_arguments(): missing=False, unknown=False, explicit=False, - implicit=False + implicit=False, + start_date="2018-02-23", + end_date=None ) q_args = query_arguments(args) @@ -72,6 +74,8 @@ def test_query_arguments(): assert q_args['installed'] is True assert q_args['known'] is any assert q_args['explicit'] is any + assert 'start_date' in q_args + assert 'end_date' not in q_args # Check that explicit works correctly args.explicit = True diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index 4c17fc8607..99cda8dd81 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -26,6 +26,7 @@ These tests check the database is functioning properly, both in memory and in its file """ +import datetime import multiprocessing import os import pytest @@ -293,6 +294,12 @@ def test_050_basic_query(database): assert len(install_db.query('mpileaks ^mpich2')) == 1 assert len(install_db.query('mpileaks ^zmpi')) == 1 + # Query by date + assert len(install_db.query(start_date=datetime.datetime.min)) == 16 + assert len(install_db.query(start_date=datetime.datetime.max)) == 0 + assert len(install_db.query(end_date=datetime.datetime.min)) == 0 + assert len(install_db.query(end_date=datetime.datetime.max)) == 16 + def test_060_remove_and_add_root_package(database): install_db = database.mock.db diff --git a/lib/spack/spack/test/llnl/util/lang.py b/lib/spack/spack/test/llnl/util/lang.py index 37dc01ce53..8b97c175b7 100644 --- a/lib/spack/spack/test/llnl/util/lang.py +++ b/lib/spack/spack/test/llnl/util/lang.py @@ -23,11 +23,18 @@ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ############################################################################## import pytest + from datetime import datetime, timedelta +import llnl.util.lang from llnl.util.lang import pretty_date, match_predicate +@pytest.fixture() +def now(): + return datetime.now() + + def test_pretty_date(): """Make sure pretty_date prints the right dates.""" now = datetime.now() @@ -75,6 +82,32 @@ def test_pretty_date(): assert pretty_date(years, now) == "2 years ago" +@pytest.mark.parametrize('delta,pretty_string', [ + (timedelta(days=1), 'a day ago'), + (timedelta(days=1), 'yesterday'), + (timedelta(days=1), '1 day ago'), + (timedelta(weeks=1), '1 week ago'), + (timedelta(weeks=3), '3 weeks ago'), + (timedelta(days=30), '1 month ago'), + (timedelta(days=730), '2 years ago'), +]) +def test_pretty_string_to_date_delta(now, delta, pretty_string): + t1 = now - delta + t2 = llnl.util.lang.pretty_string_to_date(pretty_string, now) + assert t1 == t2 + + +@pytest.mark.parametrize('format,pretty_string', [ + ('%Y', '2018'), + ('%Y-%m', '2015-03'), + ('%Y-%m-%d', '2015-03-28'), +]) +def test_pretty_string_to_date(format, pretty_string): + t1 = datetime.strptime(pretty_string, format) + t2 = llnl.util.lang.pretty_string_to_date(pretty_string, now) + assert t1 == t2 + + def test_match_predicate(): matcher = match_predicate(lambda x: True) assert matcher('foo')