refactor: change cache schema to slots based

This commit is contained in:
2026-04-06 23:08:57 +02:00
parent 92860f0d30
commit f175eda57e
9 changed files with 594 additions and 264 deletions
+155 -29
View File
@@ -7,6 +7,8 @@ import pytest
from lrx_cli.cache import (
CacheEngine,
SLOT_SYNCED,
SLOT_UNSYNCED,
_generate_key,
)
from lrx_cli.config import DURATION_TOLERANCE_MS
@@ -67,10 +69,10 @@ def test_generate_key_raises_when_metadata_missing() -> None:
def test_migrate_adds_confidence_version_and_boosts_unsynced(tmp_path: Path) -> None:
"""Legacy cache without confidence_version is migrated in-place.
"""Legacy single-row cache is migrated to slot rows.
Expected behavior:
- add confidence_version column
- add positive_kind and confidence_version
- boost SUCCESS_UNSYNCED confidence by +10 with cap at 100
- keep SUCCESS_SYNCED confidence unchanged
"""
@@ -111,16 +113,107 @@ def test_migrate_adds_confidence_version_and_boosts_unsynced(tmp_path: Path) ->
with sqlite3.connect(db_path) as conn:
cols = {r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall()}
rows = conn.execute(
"SELECT key, status, confidence, confidence_version FROM cache ORDER BY key"
"SELECT key, positive_kind, status, confidence, confidence_version FROM cache ORDER BY key, positive_kind"
).fetchall()
assert "positive_kind" in cols
assert "confidence_version" in cols
by_key = {
k: (status, confidence, version) for k, status, confidence, version in rows
(k, slot): (status, confidence, version)
for k, slot, status, confidence, version in rows
}
assert by_key["u1"] == ("SUCCESS_UNSYNCED", 95.0, 1)
assert by_key["u2"] == ("SUCCESS_UNSYNCED", 100.0, 1)
assert by_key["s1"] == ("SUCCESS_SYNCED", 70.0, 1)
assert by_key[("u1", SLOT_UNSYNCED)] == ("SUCCESS_UNSYNCED", 95.0, 1)
assert by_key[("u2", SLOT_UNSYNCED)] == ("SUCCESS_UNSYNCED", 100.0, 1)
assert by_key[("s1", SLOT_SYNCED)] == ("SUCCESS_SYNCED", 70.0, 1)
def test_migrate_negative_row_splits_into_two_slot_rows(tmp_path: Path) -> None:
db_path = tmp_path / "legacy-negative.db"
with sqlite3.connect(db_path) as conn:
conn.execute(
"""
CREATE TABLE cache (
key TEXT PRIMARY KEY,
source TEXT NOT NULL,
status TEXT NOT NULL,
lyrics TEXT,
created_at INTEGER NOT NULL,
expires_at INTEGER,
artist TEXT,
title TEXT,
album TEXT,
length INTEGER,
confidence REAL
)
"""
)
conn.execute(
"""
INSERT INTO cache
(key, source, status, lyrics, created_at, expires_at, artist, title, album, length, confidence)
VALUES
('n1', 's1', 'NOT_FOUND', NULL, 1, NULL, 'A', 'T', 'AL', 180000, 0.0)
"""
)
conn.commit()
CacheEngine(str(db_path))
with sqlite3.connect(db_path) as conn:
rows = conn.execute(
"SELECT key, positive_kind, status FROM cache ORDER BY positive_kind"
).fetchall()
assert rows == [
("n1", SLOT_SYNCED, "NOT_FOUND"),
("n1", SLOT_UNSYNCED, "NOT_FOUND"),
]
def test_migrate_normalizes_old_slot_spelling(tmp_path: Path) -> None:
db_path = tmp_path / "slot-spelling.db"
with sqlite3.connect(db_path) as conn:
conn.execute(
"""
CREATE TABLE cache (
key TEXT NOT NULL,
positive_kind TEXT NOT NULL,
source TEXT NOT NULL,
status TEXT NOT NULL,
lyrics TEXT,
created_at INTEGER NOT NULL,
expires_at INTEGER,
artist TEXT,
title TEXT,
album TEXT,
length INTEGER,
confidence REAL,
confidence_version INTEGER,
PRIMARY KEY (key, positive_kind)
)
"""
)
conn.execute(
"""
INSERT INTO cache
(key, positive_kind, source, status, lyrics, created_at, expires_at, artist, title, album, length, confidence, confidence_version)
VALUES
('k1', 'SYNCHED', 's1', 'SUCCESS_SYNCED', 'l1', 1, NULL, 'A', 'T', 'AL', 180000, 80.0, 1),
('k1', 'UNSYNCHED', 's1', 'SUCCESS_UNSYNCED', 'l2', 1, NULL, 'A', 'T', 'AL', 180000, 70.0, 1)
"""
)
conn.commit()
CacheEngine(str(db_path))
with sqlite3.connect(db_path) as conn:
rows = conn.execute(
"SELECT positive_kind FROM cache ORDER BY positive_kind"
).fetchall()
assert rows == [(SLOT_SYNCED,), (SLOT_UNSYNCED,)]
def test_set_and_get_roundtrip_with_ttl(
@@ -136,9 +229,10 @@ def test_set_and_get_roundtrip_with_ttl(
ttl_seconds=120,
)
cached = cache_db.get(track, "lrclib")
cached_rows = cache_db.get_all(track, "lrclib")
assert cached is not None
assert len(cached_rows) == 1
cached = cached_rows[0]
assert cached.status is CacheStatus.SUCCESS_SYNCED
assert str(cached.lyrics) == "[00:01.00]line"
assert cached.source == "lrclib"
@@ -158,12 +252,29 @@ def test_get_expired_entry_returns_none_and_removes_row(
)
monkeypatch.setattr("lrx_cli.cache.time.time", lambda: 2_000_020)
cached = cache_db.get(track, "netease")
cached_rows = cache_db.get_all(track, "netease")
assert cached is None
assert cached_rows == []
assert cache_db.query_all() == []
def test_set_negative_without_slot_writes_both_slots(cache_db: CacheEngine) -> None:
track = _track()
cache_db.set(
track, "src", _result(CacheStatus.NOT_FOUND, None, "src"), ttl_seconds=60
)
with sqlite3.connect(cache_db.db_path) as conn:
rows = conn.execute(
"SELECT positive_kind, status FROM cache ORDER BY positive_kind"
).fetchall()
assert rows == [
(SLOT_SYNCED, CacheStatus.NOT_FOUND.value),
(SLOT_UNSYNCED, CacheStatus.NOT_FOUND.value),
]
def test_get_backfills_missing_length_when_track_provides_it(
cache_db: CacheEngine,
) -> None:
@@ -187,9 +298,9 @@ def test_get_backfills_missing_length_when_track_provides_it(
album=None,
length=200000,
)
cached = cache_db.get(track_with_length, "spotify")
cached_rows = cache_db.get_all(track_with_length, "spotify")
assert cached is not None
assert cached_rows
with sqlite3.connect(cache_db.db_path) as conn:
row = conn.execute("SELECT length FROM cache LIMIT 1").fetchone()
@@ -268,22 +379,6 @@ def test_prune_removes_only_expired_rows(
assert rows[0]["source"] == "s-active"
def test_find_best_positive_uses_exact_match_and_prefers_synced(
cache_db: CacheEngine,
) -> None:
track = _track(artist="Artist", title="Song", album="Album")
cache_db.set(track, "s1", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "s1"))
cache_db.set(track, "s2", _result(CacheStatus.SUCCESS_SYNCED, "s", "s2"))
best = cache_db.find_best_positive(track, CacheStatus.SUCCESS_SYNCED)
assert best is not None
assert best.status is CacheStatus.SUCCESS_SYNCED
assert str(best.lyrics) == "s"
# find_best_positive always reports cache-search source
assert best.source == "cache-search"
def test_find_best_positive_returns_status_specific_results(
cache_db: CacheEngine,
) -> None:
@@ -297,6 +392,7 @@ def test_find_best_positive_returns_status_specific_results(
assert best_synced is not None
assert best_synced.status is CacheStatus.SUCCESS_SYNCED
assert str(best_synced.lyrics) == "s"
assert best_synced.source == "cache-search"
best_unsynced = cache_db.find_best_positive(track, CacheStatus.SUCCESS_UNSYNCED)
assert best_unsynced is not None
@@ -395,6 +491,34 @@ def test_update_confidence_targets_specific_source(cache_db: CacheEngine) -> Non
assert rows["s2"]["confidence"] == 100.0 # unchanged default
def test_update_confidence_updates_both_slots_for_same_source(
cache_db: CacheEngine,
) -> None:
track = _track(artist="A", title="T", album="AL")
cache_db.set(
track,
"src",
_result(CacheStatus.SUCCESS_SYNCED, "sync", "src"),
positive_kind=SLOT_SYNCED,
)
cache_db.set(
track,
"src",
_result(CacheStatus.SUCCESS_UNSYNCED, "unsync", "src"),
positive_kind=SLOT_UNSYNCED,
)
updated = cache_db.update_confidence(track, 66.0, "src")
assert updated == 2
with sqlite3.connect(cache_db.db_path) as conn:
rows = conn.execute(
"SELECT positive_kind, confidence FROM cache WHERE source = 'src' ORDER BY positive_kind"
).fetchall()
assert rows == [(SLOT_SYNCED, 66.0), (SLOT_UNSYNCED, 66.0)]
def test_update_confidence_returns_zero_for_missing_source(
cache_db: CacheEngine,
) -> None:
@@ -476,3 +600,5 @@ def test_query_track_and_stats_return_expected_aggregates(
assert stats["expired"] == 0
assert stats["by_status"][CacheStatus.SUCCESS_SYNCED.value] == 1
assert stats["by_status"][CacheStatus.SUCCESS_UNSYNCED.value] == 1
assert stats["by_slot"][SLOT_SYNCED] == 1
assert stats["by_slot"][SLOT_UNSYNCED] == 1
+44 -18
View File
@@ -2,9 +2,9 @@ from __future__ import annotations
import asyncio
from unittest.mock import patch
import pytest
from lrx_cli.config import HIGH_CONFIDENCE
from lrx_cli.cache import SLOT_UNSYNCED
from lrx_cli.core import LrcManager
from lrx_cli.fetchers.base import BaseFetcher, FetchResult
from lrx_cli.lrc import LRCData
@@ -137,7 +137,7 @@ def test_trusted_synced_cancels_sibling(tmp_path):
assert result.source == "fast"
def test_best_confidence_within_group(tmp_path):
def test_allow_unsynced_true_picks_highest_confidence_unsynced(tmp_path):
"""When allow_unsynced=True and no trusted synced result, highest-confidence unsynced is returned."""
low = MockFetcher("low", _fr(unsynced=_unsynced("low", confidence=40.0)))
high = MockFetcher("high", _fr(unsynced=_unsynced("high", confidence=70.0)))
@@ -148,6 +148,22 @@ def test_best_confidence_within_group(tmp_path):
assert result.source == "high"
def test_equal_confidence_prefers_synced_when_unsynced_allowed(tmp_path):
"""Tie on confidence should still prefer synced over unsynced."""
dual = MockFetcher(
"dual",
_fr(
synced=_synced("dual", confidence=70.0),
unsynced=_unsynced("dual", confidence=70.0),
),
)
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[dual]]):
result = manager.fetch_for_track(_track(), allow_unsynced=True)
assert result is not None
assert result.status == CacheStatus.SUCCESS_SYNCED
def test_unsynced_only_returns_none_when_not_allowed(tmp_path):
"""When allow_unsynced=False, unsynced-only pipeline result must be rejected."""
only_unsynced = MockFetcher(
@@ -210,22 +226,10 @@ def test_cache_trusted_synced_no_fetch(tmp_path):
assert result.status == CacheStatus.SUCCESS_SYNCED
@pytest.mark.xfail(
strict=True,
reason=(
"Known limitation: cache stores only one positive slot; after an allow_unsynced=True "
"request caches unsynced, later allow_unsynced=False request does not re-fetch synced"
),
)
def test_xfail_cached_unsynced_should_not_block_live_synced_when_unsynced_disallowed(
def test_cached_slots_support_strategy_switch_without_refetch(
tmp_path,
):
"""Known gap reproduced with strategy switch across two requests.
1) Fetcher returns both synced and unsynced.
2) allow_unsynced=True picks/caches higher-confidence unsynced.
3) allow_unsynced=False should re-fetch synced, but currently short-circuits on cache.
"""
"""When both slots are cached, strategy switch should reuse cache without re-fetch."""
fetcher = MockFetcher(
"src",
_fr(
@@ -244,10 +248,32 @@ def test_xfail_cached_unsynced_should_not_block_live_synced_when_unsynced_disall
fetcher.called = False
# Second request: stricter strategy should recover synced via re-fetch.
# Second request: stricter strategy should use synced cache slot directly.
with patch("lrx_cli.core.build_plan", return_value=[[fetcher]]):
second = manager.fetch_for_track(track, allow_unsynced=False)
assert fetcher.called
assert not fetcher.called
assert second is not None
assert second.status == CacheStatus.SUCCESS_SYNCED
def test_unsynced_cache_only_still_fetches_when_unsynced_disallowed(tmp_path):
"""If only unsynced cache slot exists, allow_unsynced=False must still fetch synced."""
fetcher = MockFetcher("src", _fr(synced=_synced("src", confidence=88.0)))
manager = make_manager(tmp_path)
track = _track()
manager.cache.set(
track,
"src",
_unsynced("src", confidence=95.0),
ttl_seconds=3600,
positive_kind=SLOT_UNSYNCED,
)
with patch("lrx_cli.core.build_plan", return_value=[[fetcher]]):
result = manager.fetch_for_track(track, allow_unsynced=False)
assert fetcher.called
assert result is not None
assert result.status == CacheStatus.SUCCESS_SYNCED