refactor: large scale refactor regarding selection & fetchers

This commit is contained in:
2026-04-06 13:37:51 +02:00
parent 69b7f5c60c
commit 0c85af534e
23 changed files with 794 additions and 364 deletions
+80 -2
View File
@@ -66,6 +66,63 @@ def test_generate_key_raises_when_metadata_missing() -> None:
)
def test_migrate_adds_confidence_version_and_boosts_unsynced(tmp_path: Path) -> None:
"""Legacy cache without confidence_version is migrated in-place.
Expected behavior:
- add confidence_version column
- boost SUCCESS_UNSYNCED confidence by +10 with cap at 100
- keep SUCCESS_SYNCED confidence unchanged
"""
db_path = tmp_path / "legacy-cache.db"
with sqlite3.connect(db_path) as conn:
conn.execute(
"""
CREATE TABLE cache (
key TEXT PRIMARY KEY,
source TEXT NOT NULL,
status TEXT NOT NULL,
lyrics TEXT,
created_at INTEGER NOT NULL,
expires_at INTEGER,
artist TEXT,
title TEXT,
album TEXT,
length INTEGER,
confidence REAL
)
"""
)
conn.execute(
"""
INSERT INTO cache
(key, source, status, lyrics, created_at, expires_at, artist, title, album, length, confidence)
VALUES
('u1', 's1', 'SUCCESS_UNSYNCED', 'u1', 1, NULL, 'A', 'T', 'AL', 180000, 85.0),
('u2', 's2', 'SUCCESS_UNSYNCED', 'u2', 1, NULL, 'A', 'T', 'AL', 180000, 98.0),
('s1', 's3', 'SUCCESS_SYNCED', 's1', 1, NULL, 'A', 'T', 'AL', 180000, 70.0)
"""
)
conn.commit()
CacheEngine(str(db_path))
with sqlite3.connect(db_path) as conn:
cols = {r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall()}
rows = conn.execute(
"SELECT key, status, confidence, confidence_version FROM cache ORDER BY key"
).fetchall()
assert "confidence_version" in cols
by_key = {
k: (status, confidence, version) for k, status, confidence, version in rows
}
assert by_key["u1"] == ("SUCCESS_UNSYNCED", 95.0, 1)
assert by_key["u2"] == ("SUCCESS_UNSYNCED", 100.0, 1)
assert by_key["s1"] == ("SUCCESS_SYNCED", 70.0, 1)
def test_set_and_get_roundtrip_with_ttl(
monkeypatch: pytest.MonkeyPatch, cache_db: CacheEngine
) -> None:
@@ -218,7 +275,7 @@ def test_find_best_positive_uses_exact_match_and_prefers_synced(
cache_db.set(track, "s1", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "s1"))
cache_db.set(track, "s2", _result(CacheStatus.SUCCESS_SYNCED, "s", "s2"))
best = cache_db.find_best_positive(track)
best = cache_db.find_best_positive(track, CacheStatus.SUCCESS_SYNCED)
assert best is not None
assert best.status is CacheStatus.SUCCESS_SYNCED
@@ -227,6 +284,26 @@ def test_find_best_positive_uses_exact_match_and_prefers_synced(
assert best.source == "cache-search"
def test_find_best_positive_returns_status_specific_results(
cache_db: CacheEngine,
) -> None:
track = _track(artist="Artist", title="Song", album="Album")
cache_db.set(track, "u-high", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "u-high"))
cache_db.set(track, "s-low", _result(CacheStatus.SUCCESS_SYNCED, "s", "s-low"))
cache_db.update_confidence(track, 95.0, "u-high")
cache_db.update_confidence(track, 70.0, "s-low")
best_synced = cache_db.find_best_positive(track, CacheStatus.SUCCESS_SYNCED)
assert best_synced is not None
assert best_synced.status is CacheStatus.SUCCESS_SYNCED
assert str(best_synced.lyrics) == "s"
best_unsynced = cache_db.find_best_positive(track, CacheStatus.SUCCESS_UNSYNCED)
assert best_unsynced is not None
assert best_unsynced.status is CacheStatus.SUCCESS_UNSYNCED
assert str(best_unsynced.lyrics) == "u"
def test_search_by_meta_fuzzy_rules_and_duration_sorting(cache_db: CacheEngine) -> None:
# Same logical title/artist after normalization, different length quality.
base = _track(
@@ -296,6 +373,7 @@ def test_search_by_meta_fuzzy_rules_and_duration_sorting(cache_db: CacheEngine)
sources = [r["source"] for r in rows]
assert "negative" not in sources
assert "far-len" not in sources
assert "close-unsynced" in sources
# Sorted by duration diff, then confidence for equal diff.
assert sources[0] == "seed"
assert sources[1] == "close-synced"
@@ -314,7 +392,7 @@ def test_update_confidence_targets_specific_source(cache_db: CacheEngine) -> Non
assert updated == 1
rows = {r["source"]: r for r in cache_db.query_track(track)}
assert rows["s1"]["confidence"] == 75.0
assert rows["s2"]["confidence"] == 90.0 # unchanged (unsynced default)
assert rows["s2"]["confidence"] == 100.0 # unchanged default
def test_update_confidence_returns_zero_for_missing_source(
+12 -5
View File
@@ -37,9 +37,14 @@ def lrc_manager(tmp_path: Path) -> LrcManager:
def _fetch_and_assert(
lrc_manager: LrcManager, method: FetcherMethodType, expect_fail: bool = False
lrc_manager: LrcManager,
method: FetcherMethodType,
expect_fail: bool = False,
bypass_cache: bool = True,
) -> None:
result = lrc_manager.fetch_for_track(SAMPLE_SPOTIFY_TRACK, force_method=method)
result = lrc_manager.fetch_for_track(
SAMPLE_SPOTIFY_TRACK, force_method=method, bypass_cache=bypass_cache
)
if expect_fail:
assert result is None
else:
@@ -48,7 +53,7 @@ def _fetch_and_assert(
def test_cache_search_fetcher_without_cache(lrc_manager: LrcManager):
_fetch_and_assert(lrc_manager, "cache-search", expect_fail=True)
_fetch_and_assert(lrc_manager, "cache-search", expect_fail=True, bypass_cache=False)
@pytest.mark.parametrize(
@@ -68,7 +73,9 @@ def test_cache_search_fetcher_with_fuzzy_metadata(
expected_lrc = "[00:00.01]lyrics"
lrc_manager.manual_insert(SAMPLE_SPOTIFY_TRACK, expected_lrc)
result = lrc_manager.fetch_for_track(query_track, force_method="cache-search")
result = lrc_manager.fetch_for_track(
query_track, force_method="cache-search", bypass_cache=False
)
assert result is not None
assert result.lyrics is not None
@@ -84,7 +91,7 @@ def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
)
result = lrc_manager.fetch_for_track(
SAMPLE_SPOTIFY_TRACK, force_method="cache-search"
SAMPLE_SPOTIFY_TRACK, force_method="cache-search", bypass_cache=False
)
assert result is not None
+83 -19
View File
@@ -2,10 +2,11 @@ from __future__ import annotations
import asyncio
from unittest.mock import patch
import pytest
from lrx_cli.config import HIGH_CONFIDENCE
from lrx_cli.core import LrcManager
from lrx_cli.fetchers.base import BaseFetcher
from lrx_cli.fetchers.base import BaseFetcher, FetchResult
from lrx_cli.lrc import LRCData
from lrx_cli.models import CacheStatus, LyricResult, TrackMeta
@@ -41,8 +42,15 @@ def _not_found() -> LyricResult:
return LyricResult(status=CacheStatus.NOT_FOUND)
def _fr(
synced: LyricResult | None = None,
unsynced: LyricResult | None = None,
) -> FetchResult:
return FetchResult(synced=synced, unsynced=unsynced)
class MockFetcher(BaseFetcher):
def __init__(self, name: str, result: LyricResult | None, delay: float = 0.0):
def __init__(self, name: str, result: FetchResult, delay: float = 0.0):
self._name = name
self._result = result
self._delay = delay
@@ -56,9 +64,7 @@ class MockFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool:
return True
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> LyricResult | None:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self.called = True
try:
if self._delay:
@@ -78,8 +84,8 @@ def make_manager(tmp_path) -> LrcManager:
def test_unsynced_does_not_stop_next_group(tmp_path):
"""Unsynced result should not stop the pipeline — next group must still run."""
a = MockFetcher("a", _unsynced("a"))
b = MockFetcher("b", _synced("b"))
a = MockFetcher("a", _fr(unsynced=_unsynced("a")))
b = MockFetcher("b", _fr(synced=_synced("b")))
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
result = manager.fetch_for_track(_track())
@@ -90,8 +96,8 @@ def test_unsynced_does_not_stop_next_group(tmp_path):
def test_trusted_synced_stops_next_group(tmp_path):
"""Trusted synced from group1 must prevent group2 from running."""
a = MockFetcher("a", _synced("a"))
b = MockFetcher("b", _synced("b"))
a = MockFetcher("a", _fr(synced=_synced("a")))
b = MockFetcher("b", _fr(synced=_synced("b")))
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
result = manager.fetch_for_track(_track())
@@ -102,8 +108,8 @@ def test_trusted_synced_stops_next_group(tmp_path):
def test_negative_continues_next_group(tmp_path):
"""NOT_FOUND from group1 must cause group2 to be tried."""
a = MockFetcher("a", _not_found())
b = MockFetcher("b", _synced("b"))
a = MockFetcher("a", _fr(synced=_not_found(), unsynced=_not_found()))
b = MockFetcher("b", _fr(synced=_synced("b")))
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
result = manager.fetch_for_track(_track())
@@ -119,8 +125,8 @@ def test_negative_continues_next_group(tmp_path):
def test_trusted_synced_cancels_sibling(tmp_path):
"""When a fast fetcher returns trusted synced, the slow sibling must be cancelled.
If cancellation is broken this test will block for 10 seconds."""
fast = MockFetcher("fast", _synced("fast"))
slow = MockFetcher("slow", _synced("slow"), delay=10.0)
fast = MockFetcher("fast", _fr(synced=_synced("fast")))
slow = MockFetcher("slow", _fr(synced=_synced("slow")), delay=10.0)
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[fast, slow]]):
result = manager.fetch_for_track(_track())
@@ -132,22 +138,56 @@ def test_trusted_synced_cancels_sibling(tmp_path):
def test_best_confidence_within_group(tmp_path):
"""When no trusted synced result, highest-confidence result from group is returned."""
low = MockFetcher("low", _unsynced("low", confidence=40.0))
high = MockFetcher("high", _unsynced("high", confidence=70.0))
"""When allow_unsynced=True and no trusted synced result, highest-confidence unsynced is returned."""
low = MockFetcher("low", _fr(unsynced=_unsynced("low", confidence=40.0)))
high = MockFetcher("high", _fr(unsynced=_unsynced("high", confidence=70.0)))
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[low, high]]):
result = manager.fetch_for_track(_track())
result = manager.fetch_for_track(_track(), allow_unsynced=True)
assert result is not None
assert result.source == "high"
def test_unsynced_only_returns_none_when_not_allowed(tmp_path):
"""When allow_unsynced=False, unsynced-only pipeline result must be rejected."""
only_unsynced = MockFetcher(
"u",
_fr(unsynced=_unsynced("u", confidence=95.0)),
)
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[only_unsynced]]):
result = manager.fetch_for_track(_track(), allow_unsynced=False)
assert result is None
def test_allow_unsynced_flag_controls_return_type(tmp_path):
"""With both slots available, allow_unsynced controls whether unsynced can be returned."""
dual = MockFetcher(
"dual",
_fr(
synced=_synced("dual", confidence=55.0),
unsynced=_unsynced("dual", confidence=95.0),
),
)
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[dual]]):
synced_only = manager.fetch_for_track(_track(), allow_unsynced=False)
assert synced_only is not None
assert synced_only.status == CacheStatus.SUCCESS_SYNCED
with patch("lrx_cli.core.build_plan", return_value=[[dual]]):
allow_unsynced = manager.fetch_for_track(_track(), allow_unsynced=True)
assert allow_unsynced is not None
assert allow_unsynced.status == CacheStatus.SUCCESS_UNSYNCED
# Cache interaction
def test_cache_negative_skips_fetch(tmp_path):
"""A cached NOT_FOUND entry must prevent the fetcher from being called."""
fetcher = MockFetcher("src", _synced("src"))
fetcher = MockFetcher("src", _fr(synced=_synced("src")))
manager = make_manager(tmp_path)
track = _track()
manager.cache.set(track, "src", _not_found(), ttl_seconds=3600)
@@ -159,7 +199,7 @@ def test_cache_negative_skips_fetch(tmp_path):
def test_cache_trusted_synced_no_fetch(tmp_path):
"""A trusted synced cache hit must be returned without calling the fetcher."""
fetcher = MockFetcher("src", None)
fetcher = MockFetcher("src", _fr())
manager = make_manager(tmp_path)
track = _track()
manager.cache.set(track, "src", _synced("src"), ttl_seconds=3600)
@@ -168,3 +208,27 @@ def test_cache_trusted_synced_no_fetch(tmp_path):
assert not fetcher.called
assert result is not None
assert result.status == CacheStatus.SUCCESS_SYNCED
@pytest.mark.xfail(
strict=True,
reason=(
"Known limitation: cached unsynced currently blocks live fetch, "
"so allow_unsynced=False may return None instead of fresh synced"
),
)
def test_xfail_cached_unsynced_should_not_block_live_synced_when_unsynced_disallowed(
tmp_path,
):
"""Known gap: cached unsynced prevents re-fetch, so this expected behavior is xfailed."""
fetcher = MockFetcher("src", _fr(synced=_synced("src", confidence=90.0)))
manager = make_manager(tmp_path)
track = _track()
manager.cache.set(track, "src", _unsynced("src", confidence=85.0), ttl_seconds=3600)
with patch("lrx_cli.core.build_plan", return_value=[[fetcher]]):
result = manager.fetch_for_track(track, allow_unsynced=False)
assert fetcher.called
assert result is not None
assert result.status == CacheStatus.SUCCESS_SYNCED
+7 -7
View File
@@ -75,13 +75,13 @@ def test_score_missing_one_side_gives_zero_for_field() -> None:
def test_score_synced_bonus() -> None:
"""Synced adds 10 points."""
"""Synced state does not affect metadata score anymore."""
base = SearchCandidate(item="x", title="My Love", is_synced=False)
synced = SearchCandidate(item="x", title="My Love", is_synced=True)
diff = _score_candidate(synced, "My Love", None, None, None) - _score_candidate(
base, "My Love", None, None, None
)
assert diff == 10.0
assert diff == 0.0
def test_score_duration_linear_decay() -> None:
@@ -95,11 +95,11 @@ def test_score_duration_linear_decay() -> None:
at_tol = SearchCandidate(item="x", duration_ms=232000.0 + 3000.0)
score_edge = _score_candidate(at_tol, None, None, None, 232000)
# Only duration is comparable → rescaled to fill 0-90
# exact=90, half=45, edge=0
assert score_exact == 90.0
assert score_half == 45.0
assert score_edge == 0.0
# Only duration is comparable → metadata spans 0-90, plus a constant baseline +10
# exact=100, half=55, edge=10
assert score_exact == 100.0
assert score_half == 55.0
assert score_edge == 10.0
def test_duration_hard_filter_rejects_all_mismatched() -> None: