database.py: stream of json objects forward compat (#43598)

In the future we may transform the database from a single JSON object to
a stream of JSON objects.

This paves the way for constant time writes and constant time rereads
when only O(1) changes are made. Currently both are linear time.

This commit gives just enough forward compat for Spack to produce a
friendly error when we would move to a stream of json objects, and a db
would look like this:

```json
{"database": {"version": "<something newer>"}}
```
This commit is contained in:
Harmen Stoppels 2024-04-22 09:43:41 +02:00 committed by GitHub
parent a450dd31fa
commit e8c41cdbcb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 42 additions and 15 deletions

View file

@ -25,6 +25,7 @@
import socket
import sys
import time
from json import JSONDecoder
from typing import (
Any,
Callable,
@ -818,7 +819,8 @@ def _read_from_file(self, filename):
"""
try:
with open(filename, "r") as f:
fdata = sjson.load(f)
# In the future we may use a stream of JSON objects, hence `raw_decode` for compat.
fdata, _ = JSONDecoder().raw_decode(f.read())
except Exception as e:
raise CorruptDatabaseError("error parsing database:", str(e)) from e
@ -833,27 +835,24 @@ def check(cond, msg):
# High-level file checks
db = fdata["database"]
check("installs" in db, "no 'installs' in JSON DB.")
check("version" in db, "no 'version' in JSON DB.")
installs = db["installs"]
# TODO: better version checking semantics.
version = vn.Version(db["version"])
if version > _DB_VERSION:
raise InvalidDatabaseVersionError(self, _DB_VERSION, version)
elif version < _DB_VERSION:
if not any(old == version and new == _DB_VERSION for old, new in _SKIP_REINDEX):
tty.warn(
"Spack database version changed from %s to %s. Upgrading."
% (version, _DB_VERSION)
)
elif version < _DB_VERSION and not any(
old == version and new == _DB_VERSION for old, new in _SKIP_REINDEX
):
tty.warn(f"Spack database version changed from {version} to {_DB_VERSION}. Upgrading.")
self.reindex(spack.store.STORE.layout)
installs = dict(
(k, v.to_dict(include_fields=self._record_fields))
for k, v in self._data.items()
(k, v.to_dict(include_fields=self._record_fields)) for k, v in self._data.items()
)
else:
check("installs" in db, "no 'installs' in JSON DB.")
installs = db["installs"]
spec_reader = reader(version)

View file

@ -1106,3 +1106,31 @@ def test_database_construction_doesnt_use_globals(tmpdir, config, nullify_global
lock_cfg = lock_cfg or spack.database.lock_configuration(config)
db = spack.database.Database(str(tmpdir), lock_cfg=lock_cfg)
assert os.path.exists(db.database_directory)
def test_database_read_works_with_trailing_data(tmp_path, default_mock_concretization):
# Populate a database
root = str(tmp_path)
db = spack.database.Database(root)
spec = default_mock_concretization("a")
db.add(spec, directory_layout=None)
specs_in_db = db.query_local()
assert spec in specs_in_db
# Append anything to the end of the database file
with open(db._index_path, "a") as f:
f.write(json.dumps({"hello": "world"}))
# Read the database and check that it ignores the trailing data
assert spack.database.Database(root).query_local() == specs_in_db
def test_database_errors_with_just_a_version_key(tmp_path):
root = str(tmp_path)
db = spack.database.Database(root)
next_version = f"{spack.database._DB_VERSION}.next"
with open(db._index_path, "w") as f:
f.write(json.dumps({"database": {"version": next_version}}))
with pytest.raises(spack.database.InvalidDatabaseVersionError):
spack.database.Database(root).query_local()