From e8c41cdbcb1313eff006e737165ca36611da850c Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Mon, 22 Apr 2024 09:43:41 +0200 Subject: [PATCH] database.py: stream of json objects forward compat (#43598) In the future we may transform the database from a single JSON object to a stream of JSON objects. This paves the way for constant time writes and constant time rereads when only O(1) changes are made. Currently both are linear time. This commit gives just enough forward compat for Spack to produce a friendly error when we would move to a stream of json objects, and a db would look like this: ```json {"database": {"version": ""}} ``` --- lib/spack/spack/database.py | 29 ++++++++++++++--------------- lib/spack/spack/test/database.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/lib/spack/spack/database.py b/lib/spack/spack/database.py index e2f9e57e82..3c7dff96db 100644 --- a/lib/spack/spack/database.py +++ b/lib/spack/spack/database.py @@ -25,6 +25,7 @@ import socket import sys import time +from json import JSONDecoder from typing import ( Any, Callable, @@ -818,7 +819,8 @@ def _read_from_file(self, filename): """ try: with open(filename, "r") as f: - fdata = sjson.load(f) + # In the future we may use a stream of JSON objects, hence `raw_decode` for compat. + fdata, _ = JSONDecoder().raw_decode(f.read()) except Exception as e: raise CorruptDatabaseError("error parsing database:", str(e)) from e @@ -833,27 +835,24 @@ def check(cond, msg): # High-level file checks db = fdata["database"] - check("installs" in db, "no 'installs' in JSON DB.") check("version" in db, "no 'version' in JSON DB.") - installs = db["installs"] - # TODO: better version checking semantics. version = vn.Version(db["version"]) if version > _DB_VERSION: raise InvalidDatabaseVersionError(self, _DB_VERSION, version) - elif version < _DB_VERSION: - if not any(old == version and new == _DB_VERSION for old, new in _SKIP_REINDEX): - tty.warn( - "Spack database version changed from %s to %s. Upgrading." - % (version, _DB_VERSION) - ) + elif version < _DB_VERSION and not any( + old == version and new == _DB_VERSION for old, new in _SKIP_REINDEX + ): + tty.warn(f"Spack database version changed from {version} to {_DB_VERSION}. Upgrading.") - self.reindex(spack.store.STORE.layout) - installs = dict( - (k, v.to_dict(include_fields=self._record_fields)) - for k, v in self._data.items() - ) + self.reindex(spack.store.STORE.layout) + installs = dict( + (k, v.to_dict(include_fields=self._record_fields)) for k, v in self._data.items() + ) + else: + check("installs" in db, "no 'installs' in JSON DB.") + installs = db["installs"] spec_reader = reader(version) diff --git a/lib/spack/spack/test/database.py b/lib/spack/spack/test/database.py index 0b611e93e3..b981191676 100644 --- a/lib/spack/spack/test/database.py +++ b/lib/spack/spack/test/database.py @@ -1106,3 +1106,31 @@ def test_database_construction_doesnt_use_globals(tmpdir, config, nullify_global lock_cfg = lock_cfg or spack.database.lock_configuration(config) db = spack.database.Database(str(tmpdir), lock_cfg=lock_cfg) assert os.path.exists(db.database_directory) + + +def test_database_read_works_with_trailing_data(tmp_path, default_mock_concretization): + # Populate a database + root = str(tmp_path) + db = spack.database.Database(root) + spec = default_mock_concretization("a") + db.add(spec, directory_layout=None) + specs_in_db = db.query_local() + assert spec in specs_in_db + + # Append anything to the end of the database file + with open(db._index_path, "a") as f: + f.write(json.dumps({"hello": "world"})) + + # Read the database and check that it ignores the trailing data + assert spack.database.Database(root).query_local() == specs_in_db + + +def test_database_errors_with_just_a_version_key(tmp_path): + root = str(tmp_path) + db = spack.database.Database(root) + next_version = f"{spack.database._DB_VERSION}.next" + with open(db._index_path, "w") as f: + f.write(json.dumps({"database": {"version": next_version}})) + + with pytest.raises(spack.database.InvalidDatabaseVersionError): + spack.database.Database(root).query_local()