refactor: large scale refactor regarding selection & fetchers

2026-04-06 13:37:51 +02:00
parent 69b7f5c60c
commit 0c85af534e
23 changed files with 794 additions and 364 deletions
@@ -66,6 +66,63 @@ def test_generate_key_raises_when_metadata_missing() -> None:
        )


+def test_migrate_adds_confidence_version_and_boosts_unsynced(tmp_path: Path) -> None:
+    """Legacy cache without confidence_version is migrated in-place.
+
+    Expected behavior:
+    - add confidence_version column
+    - boost SUCCESS_UNSYNCED confidence by +10 with cap at 100
+    - keep SUCCESS_SYNCED confidence unchanged
+    """
+    db_path = tmp_path / "legacy-cache.db"
+
+    with sqlite3.connect(db_path) as conn:
+        conn.execute(
+            """
+            CREATE TABLE cache (
+                key TEXT PRIMARY KEY,
+                source TEXT NOT NULL,
+                status TEXT NOT NULL,
+                lyrics TEXT,
+                created_at INTEGER NOT NULL,
+                expires_at INTEGER,
+                artist TEXT,
+                title TEXT,
+                album TEXT,
+                length INTEGER,
+                confidence REAL
+            )
+            """
+        )
+        conn.execute(
+            """
+            INSERT INTO cache
+                (key, source, status, lyrics, created_at, expires_at, artist, title, album, length, confidence)
+            VALUES
+                ('u1', 's1', 'SUCCESS_UNSYNCED', 'u1', 1, NULL, 'A', 'T', 'AL', 180000, 85.0),
+                ('u2', 's2', 'SUCCESS_UNSYNCED', 'u2', 1, NULL, 'A', 'T', 'AL', 180000, 98.0),
+                ('s1', 's3', 'SUCCESS_SYNCED', 's1', 1, NULL, 'A', 'T', 'AL', 180000, 70.0)
+            """
+        )
+        conn.commit()
+
+    CacheEngine(str(db_path))
+
+    with sqlite3.connect(db_path) as conn:
+        cols = {r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall()}
+        rows = conn.execute(
+            "SELECT key, status, confidence, confidence_version FROM cache ORDER BY key"
+        ).fetchall()
+
+    assert "confidence_version" in cols
+    by_key = {
+        k: (status, confidence, version) for k, status, confidence, version in rows
+    }
+    assert by_key["u1"] == ("SUCCESS_UNSYNCED", 95.0, 1)
+    assert by_key["u2"] == ("SUCCESS_UNSYNCED", 100.0, 1)
+    assert by_key["s1"] == ("SUCCESS_SYNCED", 70.0, 1)
+
+
 def test_set_and_get_roundtrip_with_ttl(
    monkeypatch: pytest.MonkeyPatch, cache_db: CacheEngine
 ) -> None:
@@ -218,7 +275,7 @@ def test_find_best_positive_uses_exact_match_and_prefers_synced(
    cache_db.set(track, "s1", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "s1"))
    cache_db.set(track, "s2", _result(CacheStatus.SUCCESS_SYNCED, "s", "s2"))

-    best = cache_db.find_best_positive(track)
+    best = cache_db.find_best_positive(track, CacheStatus.SUCCESS_SYNCED)

    assert best is not None
    assert best.status is CacheStatus.SUCCESS_SYNCED
@@ -227,6 +284,26 @@ def test_find_best_positive_uses_exact_match_and_prefers_synced(
    assert best.source == "cache-search"


+def test_find_best_positive_returns_status_specific_results(
+    cache_db: CacheEngine,
+) -> None:
+    track = _track(artist="Artist", title="Song", album="Album")
+    cache_db.set(track, "u-high", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "u-high"))
+    cache_db.set(track, "s-low", _result(CacheStatus.SUCCESS_SYNCED, "s", "s-low"))
+    cache_db.update_confidence(track, 95.0, "u-high")
+    cache_db.update_confidence(track, 70.0, "s-low")
+
+    best_synced = cache_db.find_best_positive(track, CacheStatus.SUCCESS_SYNCED)
+    assert best_synced is not None
+    assert best_synced.status is CacheStatus.SUCCESS_SYNCED
+    assert str(best_synced.lyrics) == "s"
+
+    best_unsynced = cache_db.find_best_positive(track, CacheStatus.SUCCESS_UNSYNCED)
+    assert best_unsynced is not None
+    assert best_unsynced.status is CacheStatus.SUCCESS_UNSYNCED
+    assert str(best_unsynced.lyrics) == "u"
+
+
 def test_search_by_meta_fuzzy_rules_and_duration_sorting(cache_db: CacheEngine) -> None:
    # Same logical title/artist after normalization, different length quality.
    base = _track(
@@ -296,6 +373,7 @@ def test_search_by_meta_fuzzy_rules_and_duration_sorting(cache_db: CacheEngine)
    sources = [r["source"] for r in rows]
    assert "negative" not in sources
    assert "far-len" not in sources
+    assert "close-unsynced" in sources
    # Sorted by duration diff, then confidence for equal diff.
    assert sources[0] == "seed"
    assert sources[1] == "close-synced"
@@ -314,7 +392,7 @@ def test_update_confidence_targets_specific_source(cache_db: CacheEngine) -> Non
    assert updated == 1
    rows = {r["source"]: r for r in cache_db.query_track(track)}
    assert rows["s1"]["confidence"] == 75.0
-    assert rows["s2"]["confidence"] == 90.0  # unchanged (unsynced default)
+    assert rows["s2"]["confidence"] == 100.0  # unchanged default


 def test_update_confidence_returns_zero_for_missing_source(
@@ -37,9 +37,14 @@ def lrc_manager(tmp_path: Path) -> LrcManager:


 def _fetch_and_assert(
-    lrc_manager: LrcManager, method: FetcherMethodType, expect_fail: bool = False
+    lrc_manager: LrcManager,
+    method: FetcherMethodType,
+    expect_fail: bool = False,
+    bypass_cache: bool = True,
 ) -> None:
-    result = lrc_manager.fetch_for_track(SAMPLE_SPOTIFY_TRACK, force_method=method)
+    result = lrc_manager.fetch_for_track(
+        SAMPLE_SPOTIFY_TRACK, force_method=method, bypass_cache=bypass_cache
+    )
    if expect_fail:
        assert result is None
    else:
@@ -48,7 +53,7 @@ def _fetch_and_assert(


 def test_cache_search_fetcher_without_cache(lrc_manager: LrcManager):
-    _fetch_and_assert(lrc_manager, "cache-search", expect_fail=True)
+    _fetch_and_assert(lrc_manager, "cache-search", expect_fail=True, bypass_cache=False)


@pytest.mark.parametrize(
@@ -68,7 +73,9 @@ def test_cache_search_fetcher_with_fuzzy_metadata(
    expected_lrc = "[00:00.01]lyrics"
    lrc_manager.manual_insert(SAMPLE_SPOTIFY_TRACK, expected_lrc)

-    result = lrc_manager.fetch_for_track(query_track, force_method="cache-search")
+    result = lrc_manager.fetch_for_track(
+        query_track, force_method="cache-search", bypass_cache=False
+    )

    assert result is not None
    assert result.lyrics is not None
@@ -84,7 +91,7 @@ def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
    )

    result = lrc_manager.fetch_for_track(
-        SAMPLE_SPOTIFY_TRACK, force_method="cache-search"
+        SAMPLE_SPOTIFY_TRACK, force_method="cache-search", bypass_cache=False
    )

    assert result is not None
@@ -2,10 +2,11 @@ from __future__ import annotations

 import asyncio
 from unittest.mock import patch
+import pytest

 from lrx_cli.config import HIGH_CONFIDENCE
 from lrx_cli.core import LrcManager
-from lrx_cli.fetchers.base import BaseFetcher
+from lrx_cli.fetchers.base import BaseFetcher, FetchResult
 from lrx_cli.lrc import LRCData
 from lrx_cli.models import CacheStatus, LyricResult, TrackMeta

@@ -41,8 +42,15 @@ def _not_found() -> LyricResult:
    return LyricResult(status=CacheStatus.NOT_FOUND)


+def _fr(
+    synced: LyricResult | None = None,
+    unsynced: LyricResult | None = None,
+) -> FetchResult:
+    return FetchResult(synced=synced, unsynced=unsynced)
+
+
 class MockFetcher(BaseFetcher):
-    def __init__(self, name: str, result: LyricResult | None, delay: float = 0.0):
+    def __init__(self, name: str, result: FetchResult, delay: float = 0.0):
        self._name = name
        self._result = result
        self._delay = delay
@@ -56,9 +64,7 @@ class MockFetcher(BaseFetcher):
    def is_available(self, track: TrackMeta) -> bool:
        return True

-    async def fetch(
-        self, track: TrackMeta, bypass_cache: bool = False
-    ) -> LyricResult | None:
+    async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
        self.called = True
        try:
            if self._delay:
@@ -78,8 +84,8 @@ def make_manager(tmp_path) -> LrcManager:

 def test_unsynced_does_not_stop_next_group(tmp_path):
    """Unsynced result should not stop the pipeline — next group must still run."""
-    a = MockFetcher("a", _unsynced("a"))
-    b = MockFetcher("b", _synced("b"))
+    a = MockFetcher("a", _fr(unsynced=_unsynced("a")))
+    b = MockFetcher("b", _fr(synced=_synced("b")))
    manager = make_manager(tmp_path)
    with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
        result = manager.fetch_for_track(_track())
@@ -90,8 +96,8 @@ def test_unsynced_does_not_stop_next_group(tmp_path):

 def test_trusted_synced_stops_next_group(tmp_path):
    """Trusted synced from group1 must prevent group2 from running."""
-    a = MockFetcher("a", _synced("a"))
-    b = MockFetcher("b", _synced("b"))
+    a = MockFetcher("a", _fr(synced=_synced("a")))
+    b = MockFetcher("b", _fr(synced=_synced("b")))
    manager = make_manager(tmp_path)
    with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
        result = manager.fetch_for_track(_track())
@@ -102,8 +108,8 @@ def test_trusted_synced_stops_next_group(tmp_path):

 def test_negative_continues_next_group(tmp_path):
    """NOT_FOUND from group1 must cause group2 to be tried."""
-    a = MockFetcher("a", _not_found())
-    b = MockFetcher("b", _synced("b"))
+    a = MockFetcher("a", _fr(synced=_not_found(), unsynced=_not_found()))
+    b = MockFetcher("b", _fr(synced=_synced("b")))
    manager = make_manager(tmp_path)
    with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
        result = manager.fetch_for_track(_track())
@@ -119,8 +125,8 @@ def test_negative_continues_next_group(tmp_path):
 def test_trusted_synced_cancels_sibling(tmp_path):
    """When a fast fetcher returns trusted synced, the slow sibling must be cancelled.
    If cancellation is broken this test will block for 10 seconds."""
-    fast = MockFetcher("fast", _synced("fast"))
-    slow = MockFetcher("slow", _synced("slow"), delay=10.0)
+    fast = MockFetcher("fast", _fr(synced=_synced("fast")))
+    slow = MockFetcher("slow", _fr(synced=_synced("slow")), delay=10.0)
    manager = make_manager(tmp_path)
    with patch("lrx_cli.core.build_plan", return_value=[[fast, slow]]):
        result = manager.fetch_for_track(_track())
@@ -132,22 +138,56 @@ def test_trusted_synced_cancels_sibling(tmp_path):


 def test_best_confidence_within_group(tmp_path):
-    """When no trusted synced result, highest-confidence result from group is returned."""
-    low = MockFetcher("low", _unsynced("low", confidence=40.0))
-    high = MockFetcher("high", _unsynced("high", confidence=70.0))
+    """When allow_unsynced=True and no trusted synced result, highest-confidence unsynced is returned."""
+    low = MockFetcher("low", _fr(unsynced=_unsynced("low", confidence=40.0)))
+    high = MockFetcher("high", _fr(unsynced=_unsynced("high", confidence=70.0)))
    manager = make_manager(tmp_path)
    with patch("lrx_cli.core.build_plan", return_value=[[low, high]]):
-        result = manager.fetch_for_track(_track())
+        result = manager.fetch_for_track(_track(), allow_unsynced=True)
    assert result is not None
    assert result.source == "high"


+def test_unsynced_only_returns_none_when_not_allowed(tmp_path):
+    """When allow_unsynced=False, unsynced-only pipeline result must be rejected."""
+    only_unsynced = MockFetcher(
+        "u",
+        _fr(unsynced=_unsynced("u", confidence=95.0)),
+    )
+    manager = make_manager(tmp_path)
+    with patch("lrx_cli.core.build_plan", return_value=[[only_unsynced]]):
+        result = manager.fetch_for_track(_track(), allow_unsynced=False)
+    assert result is None
+
+
+def test_allow_unsynced_flag_controls_return_type(tmp_path):
+    """With both slots available, allow_unsynced controls whether unsynced can be returned."""
+    dual = MockFetcher(
+        "dual",
+        _fr(
+            synced=_synced("dual", confidence=55.0),
+            unsynced=_unsynced("dual", confidence=95.0),
+        ),
+    )
+    manager = make_manager(tmp_path)
+
+    with patch("lrx_cli.core.build_plan", return_value=[[dual]]):
+        synced_only = manager.fetch_for_track(_track(), allow_unsynced=False)
+    assert synced_only is not None
+    assert synced_only.status == CacheStatus.SUCCESS_SYNCED
+
+    with patch("lrx_cli.core.build_plan", return_value=[[dual]]):
+        allow_unsynced = manager.fetch_for_track(_track(), allow_unsynced=True)
+    assert allow_unsynced is not None
+    assert allow_unsynced.status == CacheStatus.SUCCESS_UNSYNCED
+
+
 # Cache interaction


 def test_cache_negative_skips_fetch(tmp_path):
    """A cached NOT_FOUND entry must prevent the fetcher from being called."""
-    fetcher = MockFetcher("src", _synced("src"))
+    fetcher = MockFetcher("src", _fr(synced=_synced("src")))
    manager = make_manager(tmp_path)
    track = _track()
    manager.cache.set(track, "src", _not_found(), ttl_seconds=3600)
@@ -159,7 +199,7 @@ def test_cache_negative_skips_fetch(tmp_path):

 def test_cache_trusted_synced_no_fetch(tmp_path):
    """A trusted synced cache hit must be returned without calling the fetcher."""
-    fetcher = MockFetcher("src", None)
+    fetcher = MockFetcher("src", _fr())
    manager = make_manager(tmp_path)
    track = _track()
    manager.cache.set(track, "src", _synced("src"), ttl_seconds=3600)
@@ -168,3 +208,27 @@ def test_cache_trusted_synced_no_fetch(tmp_path):
    assert not fetcher.called
    assert result is not None
    assert result.status == CacheStatus.SUCCESS_SYNCED
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason=(
+        "Known limitation: cached unsynced currently blocks live fetch, "
+        "so allow_unsynced=False may return None instead of fresh synced"
+    ),
+)
+def test_xfail_cached_unsynced_should_not_block_live_synced_when_unsynced_disallowed(
+    tmp_path,
+):
+    """Known gap: cached unsynced prevents re-fetch, so this expected behavior is xfailed."""
+    fetcher = MockFetcher("src", _fr(synced=_synced("src", confidence=90.0)))
+    manager = make_manager(tmp_path)
+    track = _track()
+    manager.cache.set(track, "src", _unsynced("src", confidence=85.0), ttl_seconds=3600)
+
+    with patch("lrx_cli.core.build_plan", return_value=[[fetcher]]):
+        result = manager.fetch_for_track(track, allow_unsynced=False)
+
+    assert fetcher.called
+    assert result is not None
+    assert result.status == CacheStatus.SUCCESS_SYNCED
@@ -75,13 +75,13 @@ def test_score_missing_one_side_gives_zero_for_field() -> None:


 def test_score_synced_bonus() -> None:
-    """Synced adds 10 points."""
+    """Synced state does not affect metadata score anymore."""
    base = SearchCandidate(item="x", title="My Love", is_synced=False)
    synced = SearchCandidate(item="x", title="My Love", is_synced=True)
    diff = _score_candidate(synced, "My Love", None, None, None) - _score_candidate(
        base, "My Love", None, None, None
    )
-    assert diff == 10.0
+    assert diff == 0.0


 def test_score_duration_linear_decay() -> None:
@@ -95,11 +95,11 @@ def test_score_duration_linear_decay() -> None:
    at_tol = SearchCandidate(item="x", duration_ms=232000.0 + 3000.0)
    score_edge = _score_candidate(at_tol, None, None, None, 232000)

-    # Only duration is comparable → rescaled to fill 0-90
-    # exact=90, half=45, edge=0
-    assert score_exact == 90.0
-    assert score_half == 45.0
-    assert score_edge == 0.0
+    # Only duration is comparable → metadata spans 0-90, plus a constant baseline +10
+    # exact=100, half=55, edge=10
+    assert score_exact == 100.0
+    assert score_half == 55.0
+    assert score_edge == 10.0


 def test_duration_hard_filter_rejects_all_mismatched() -> None: