From f175eda57ed5787d6a85cddf82a5018416b3c20a Mon Sep 17 00:00:00 2001 From: Uyanide Date: Mon, 6 Apr 2026 23:08:57 +0200 Subject: [PATCH] refactor: change cache schema to slots based --- pyproject.toml | 2 +- src/lrx_cli/cache.py | 357 +++++++++++++++++++++++++++-------------- src/lrx_cli/cli.py | 10 +- src/lrx_cli/config.py | 3 + src/lrx_cli/core.py | 169 +++++++++---------- src/lrx_cli/ranking.py | 69 ++++++++ tests/test_cache.py | 184 +++++++++++++++++---- tests/test_pipeline.py | 62 ++++--- uv.lock | 2 +- 9 files changed, 594 insertions(+), 264 deletions(-) create mode 100644 src/lrx_cli/ranking.py diff --git a/pyproject.toml b/pyproject.toml index 858f53a..aa0d4c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "lrx-cli" -version = "0.6.0" +version = "0.6.1" description = "Fetch line-synced lyrics for your music player." readme = "README.md" requires-python = ">=3.13" diff --git a/src/lrx_cli/cache.py b/src/lrx_cli/cache.py index f26ca58..2fe2029 100644 --- a/src/lrx_cli/cache.py +++ b/src/lrx_cli/cache.py @@ -1,8 +1,8 @@ """ Author: Uyanide pywang0608@foxmail.com Date: 2026-03-25 10:18:03 -Description: SQLite-based lyric cache with per-source storage, TTL expiration, - and lightweight schema migrations (including confidence versioning). +Description: SQLite-based lyric cache with per-source slot rows, TTL expiration, + and schema migrations (confidence versioning + slot migration). """ import json @@ -14,8 +14,18 @@ from loguru import logger from .lrc import LRCData from .normalize import normalize_for_match as _normalize_for_match -from .config import DURATION_TOLERANCE_MS, LEGACY_CONFIDENCE, CONFIDENCE_ALGO_VERSION +from .config import ( + DURATION_TOLERANCE_MS, + LEGACY_CONFIDENCE, + CONFIDENCE_ALGO_VERSION, + SLOT_SYNCED, + SLOT_UNSYNCED, +) from .models import TrackMeta, LyricResult, CacheStatus +from .ranking import is_positive_status, select_best_positive + + +_ALL_SLOTS = (SLOT_SYNCED, SLOT_UNSYNCED) # Fixed WHERE clause for exact track matching. Column names are hardcoded @@ -76,31 +86,8 @@ class CacheEngine: self._init_db() def _init_db(self) -> None: - """Create or migrate cache schema and credentials table. - - Migration notes: - - Add structural columns introduced after initial releases. - - When introducing confidence versioning, rebalance legacy unsynced - confidence (+10, capped at 100) and stamp migrated rows with the - current algorithm version. - """ + """Create cache tables and run one-time slot/cache migrations.""" with sqlite3.connect(self.db_path) as conn: - conn.execute(""" - CREATE TABLE IF NOT EXISTS cache ( - key TEXT PRIMARY KEY, - source TEXT NOT NULL, - status TEXT NOT NULL, - lyrics TEXT, - created_at INTEGER NOT NULL, - expires_at INTEGER, - artist TEXT, - title TEXT, - album TEXT, - length INTEGER, - confidence REAL, - confidence_version INTEGER - ) - """) conn.execute(""" CREATE TABLE IF NOT EXISTS credentials ( name TEXT PRIMARY KEY, @@ -108,23 +95,44 @@ class CacheEngine: expires_at INTEGER ) """) - # Incremental, idempotent migrations for existing databases. + cache_exists = conn.execute( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name='cache'" + ).fetchone() + if not cache_exists: + self._create_cache_table(conn) + conn.commit() + return + cols = {r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall()} - if "length" not in cols: - conn.execute("ALTER TABLE cache ADD COLUMN length INTEGER") - if "confidence" not in cols: - conn.execute("ALTER TABLE cache ADD COLUMN confidence REAL") + + if "positive_kind" not in cols: + # Normalize legacy shape first so migration SQL can safely read all columns. + if "length" not in cols: + conn.execute("ALTER TABLE cache ADD COLUMN length INTEGER") + if "confidence" not in cols: + conn.execute("ALTER TABLE cache ADD COLUMN confidence REAL") + if "confidence_version" not in cols: + conn.execute( + "ALTER TABLE cache ADD COLUMN confidence_version INTEGER" + ) + self._migrate_legacy_to_slot_cache(conn) + cols = { + r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall() + } + if "confidence_version" not in cols: conn.execute("ALTER TABLE cache ADD COLUMN confidence_version INTEGER") - # First-time confidence-version migration: boost unsynced rows - # from older scoring assumptions while preserving upper bound. conn.execute( """ UPDATE cache SET confidence = MIN(100.0, COALESCE(confidence, ?) + 10.0) - WHERE status = ? + WHERE status = ? AND positive_kind = ? """, - (LEGACY_CONFIDENCE, CacheStatus.SUCCESS_UNSYNCED.value), + ( + LEGACY_CONFIDENCE, + CacheStatus.SUCCESS_UNSYNCED.value, + SLOT_UNSYNCED, + ), ) conn.execute( "UPDATE cache SET confidence_version = ? WHERE confidence_version IS NULL", @@ -132,92 +140,182 @@ class CacheEngine: ) conn.commit() + def _create_cache_table(self, conn: sqlite3.Connection) -> None: + conn.execute(""" + CREATE TABLE IF NOT EXISTS cache ( + key TEXT NOT NULL, + positive_kind TEXT NOT NULL, + source TEXT NOT NULL, + status TEXT NOT NULL, + lyrics TEXT, + created_at INTEGER NOT NULL, + expires_at INTEGER, + artist TEXT, + title TEXT, + album TEXT, + length INTEGER, + confidence REAL, + confidence_version INTEGER, + PRIMARY KEY (key, positive_kind) + ) + """) + + def _migrate_legacy_to_slot_cache(self, conn: sqlite3.Connection) -> None: + """One-time migration from single-row cache to slot-scoped cache rows.""" + conn.execute("ALTER TABLE cache RENAME TO cache_legacy") + self._create_cache_table(conn) + + positive_statuses = ( + CacheStatus.SUCCESS_SYNCED.value, + CacheStatus.SUCCESS_UNSYNCED.value, + ) + negative_statuses = ( + CacheStatus.NOT_FOUND.value, + CacheStatus.NETWORK_ERROR.value, + ) + + conn.execute( + """ + INSERT INTO cache ( + key, positive_kind, source, status, lyrics, created_at, expires_at, + artist, title, album, length, confidence, confidence_version + ) + SELECT + key, + CASE + WHEN status = ? THEN ? + WHEN status = ? THEN ? + ELSE ? + END, + source, status, lyrics, created_at, expires_at, artist, title, album, length, + CASE + WHEN status = ? THEN MIN(100.0, COALESCE(confidence, ?) + 10.0) + WHEN status = ? THEN COALESCE(confidence, ?) + ELSE COALESCE(confidence, 0.0) + END, + COALESCE(confidence_version, ?) + FROM cache_legacy + WHERE status IN (?, ?) + """, + ( + CacheStatus.SUCCESS_SYNCED.value, + SLOT_SYNCED, + CacheStatus.SUCCESS_UNSYNCED.value, + SLOT_UNSYNCED, + SLOT_SYNCED, + CacheStatus.SUCCESS_UNSYNCED.value, + LEGACY_CONFIDENCE, + CacheStatus.SUCCESS_SYNCED.value, + LEGACY_CONFIDENCE, + CONFIDENCE_ALGO_VERSION, + positive_statuses[0], + positive_statuses[1], + ), + ) + + for slot in _ALL_SLOTS: + conn.execute( + """ + INSERT INTO cache ( + key, positive_kind, source, status, lyrics, created_at, expires_at, + artist, title, album, length, confidence, confidence_version + ) + SELECT + key, ?, source, status, lyrics, created_at, expires_at, artist, title, + album, length, + COALESCE(confidence, 0.0), + COALESCE(confidence_version, ?) + FROM cache_legacy + WHERE status IN (?, ?) + """, + ( + slot, + CONFIDENCE_ALGO_VERSION, + negative_statuses[0], + negative_statuses[1], + ), + ) + + conn.execute("DROP TABLE cache_legacy") + + @staticmethod + def _slot_for_status(status: CacheStatus) -> str: + if status == CacheStatus.SUCCESS_SYNCED: + return SLOT_SYNCED + if status == CacheStatus.SUCCESS_UNSYNCED: + return SLOT_UNSYNCED + raise ValueError(f"Status {status.value} requires explicit slot") + # Read - def get(self, track: TrackMeta, source: str) -> Optional[LyricResult]: - """Look up a cached result for *track* from *source*. - - Returns None on cache miss or expiration. - """ + def get_all(self, track: TrackMeta, source: str) -> list[LyricResult]: + """Return all non-expired cached slot rows for *track*/*source*.""" try: key = _generate_key(track, source) except ValueError: - return None + return [] + now = int(time.time()) with sqlite3.connect(self.db_path) as conn: - row = conn.execute( - "SELECT status, lyrics, source, expires_at, length, confidence FROM cache WHERE key = ?", - (key,), - ).fetchone() + conn.execute( + "DELETE FROM cache WHERE key = ? AND expires_at IS NOT NULL AND expires_at < ?", + (key, now), + ) + conn.commit() + rows = conn.execute( + """ + SELECT status, lyrics, source, expires_at, length, confidence + FROM cache + WHERE key = ? AND (expires_at IS NULL OR expires_at > ?) + ORDER BY positive_kind + """, + (key, now), + ).fetchall() - if not row: + if not rows: logger.debug(f"Cache miss: {source} / {track.display_name()}") - return None + return [] - status_str, lyrics, src, expires_at, cached_length, confidence = row - - # Check TTL expiration - if expires_at and expires_at < int(time.time()): - logger.debug(f"Cache expired: {source} / {track.display_name()}") - conn.execute("DELETE FROM cache WHERE key = ?", (key,)) - conn.commit() - return None - - # Backfill length if the cached row is missing it - if cached_length is None and track.length is not None: + # Backfill missing length for all slot rows under the same key. + if track.length is not None: conn.execute( - "UPDATE cache SET length = ? WHERE key = ?", + "UPDATE cache SET length = ? WHERE key = ? AND length IS NULL", (track.length, key), ) conn.commit() - remaining = expires_at - int(time.time()) if expires_at else None - logger.debug( - f"Cache hit: {source} / {track.display_name()} " - f"[{status_str}, ttl={remaining}s]" - ) + results: list[LyricResult] = [] + for status_str, lyrics, src, expires_at, _cached_length, confidence in rows: + remaining = expires_at - now if expires_at else None status = CacheStatus(status_str) if confidence is None: - if status in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED): + if is_positive_status(status): confidence = LEGACY_CONFIDENCE else: - confidence = 0.0 # negative statuses: no confidence - - return LyricResult( - status=status, - lyrics=LRCData(lyrics) if lyrics else None, - source=src, - ttl=remaining, - confidence=confidence, + confidence = 0.0 + results.append( + LyricResult( + status=status, + lyrics=LRCData(lyrics) if lyrics else None, + source=src, + ttl=remaining, + confidence=confidence, + ) ) - def get_best(self, track: TrackMeta, sources: list[str]) -> Optional[LyricResult]: - """Return the best cached result across *sources* by confidence. + return results - Skips negative statuses (NOT_FOUND, NETWORK_ERROR) — those are only - consulted per-source to avoid redundant fetches. + def get_best(self, track: TrackMeta, sources: list[str]) -> Optional[LyricResult]: + """Return best positive cached result across sources. + + Negative statuses are ignored by ranking. """ - best: Optional[LyricResult] = None + positives: list[LyricResult] = [] for src in sources: - cached = self.get(track, src) - if not cached: - continue - if cached.status not in ( - CacheStatus.SUCCESS_SYNCED, - CacheStatus.SUCCESS_UNSYNCED, - ): - continue - if best is None: - best = cached - elif cached.confidence > best.confidence: - best = cached - elif ( - cached.confidence == best.confidence - and cached.status == CacheStatus.SUCCESS_SYNCED - and best.status != CacheStatus.SUCCESS_SYNCED - ): - best = cached - return best + rows = self.get_all(track, src) + positives.extend(r for r in rows if is_positive_status(r.status)) + + return select_best_positive(positives, allow_unsynced=True) # Write @@ -227,6 +325,7 @@ class CacheEngine: source: str, result: LyricResult, ttl_seconds: Optional[int] = None, + positive_kind: Optional[str] = None, ) -> None: """Store a lyric result in the cache. @@ -242,27 +341,41 @@ class CacheEngine: now = int(time.time()) expires_at = now + ttl_seconds if ttl_seconds else None + kinds: list[str] + if positive_kind is not None: + kinds = [positive_kind] + elif result.status in ( + CacheStatus.SUCCESS_SYNCED, + CacheStatus.SUCCESS_UNSYNCED, + ): + kinds = [self._slot_for_status(result.status)] + else: + # Convenience for callers that still pass a single negative result. + kinds = [SLOT_SYNCED, SLOT_UNSYNCED] + with sqlite3.connect(self.db_path) as conn: - conn.execute( - """INSERT OR REPLACE INTO cache - (key, source, status, lyrics, created_at, expires_at, - artist, title, album, length, confidence, confidence_version) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", - ( - key, - source, - result.status.value, - str(result.lyrics) if result.lyrics else None, - now, - expires_at, - track.artist, - track.title, - track.album, - track.length, - result.confidence, - CONFIDENCE_ALGO_VERSION, - ), - ) + for kind in kinds: + conn.execute( + """INSERT OR REPLACE INTO cache + (key, positive_kind, source, status, lyrics, created_at, expires_at, + artist, title, album, length, confidence, confidence_version) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + key, + kind, + source, + result.status.value, + str(result.lyrics) if result.lyrics else None, + now, + expires_at, + track.artist, + track.title, + track.album, + track.length, + result.confidence, + CONFIDENCE_ALGO_VERSION, + ), + ) conn.commit() logger.debug( f"Cached: {source} / {track.display_name()} " @@ -332,6 +445,7 @@ class CacheEngine: f"SELECT status, lyrics, source, confidence FROM cache" f" WHERE {_TRACK_WHERE}" " AND status = ?" + " AND positive_kind = ?" " AND (expires_at IS NULL OR expires_at > ?)" " ORDER BY COALESCE(confidence, ?) DESC," " CASE status WHEN ? THEN 0 ELSE 1 END," @@ -339,6 +453,7 @@ class CacheEngine: _track_where_params(track) + [ status.value, + self._slot_for_status(status), now, LEGACY_CONFIDENCE, CacheStatus.SUCCESS_SYNCED.value, @@ -520,6 +635,11 @@ class CacheEngine: "SELECT source, COUNT(*) FROM cache GROUP BY source" ).fetchall() ) + by_slot = dict( + conn.execute( + "SELECT positive_kind, COUNT(*) FROM cache GROUP BY positive_kind" + ).fetchall() + ) # Source × Status cross-tabulation source_status = conn.execute( "SELECT source, status, COUNT(*) FROM cache GROUP BY source, status" @@ -567,6 +687,7 @@ class CacheEngine: "active": total - expired, "by_status": by_status, "by_source": by_source, + "by_slot": by_slot, "source_status": source_status_table, "confidence_buckets": buckets, } diff --git a/src/lrx_cli/cli.py b/src/lrx_cli/cli.py index 2739f3e..c6263ff 100644 --- a/src/lrx_cli/cli.py +++ b/src/lrx_cli/cli.py @@ -379,6 +379,13 @@ def stats(): print(f"Active : {s['active']}") print(f"Expired : {s['expired']}") + by_slot = s.get("by_slot", {}) + if by_slot: + print( + "Slots : " + + ", ".join(f"{k}={v}" for k, v in sorted(by_slot.items())) + ) + # Source × Status table table = s.get("source_status", {}) if table: @@ -509,6 +516,7 @@ def _print_cache_row(row: dict, indent: str = "") -> None: """Pretty-print a single cache row.""" now = int(time.time()) source = row.get("source", "?") + slot = row.get("positive_kind", "?") status = row.get("status", "?") artist = row.get("artist", "") title = row.get("title", "") @@ -519,7 +527,7 @@ def _print_cache_row(row: dict, indent: str = "") -> None: confidence = row.get("confidence") name = f"{artist} - {title}" if artist and title else row.get("key", "?") - print(f"{indent}[{source}] {name}") + print(f"{indent}[{source}/{slot}] {name}") if album: print(f"{indent} Album : {album}") print(f"{indent} Status : {status}") diff --git a/src/lrx_cli/config.py b/src/lrx_cli/config.py index 13e8048..c135d23 100644 --- a/src/lrx_cli/config.py +++ b/src/lrx_cli/config.py @@ -20,6 +20,9 @@ APP_VERSION = version(APP_NAME) # Paths CACHE_DIR = user_cache_dir(APP_NAME, APP_AUTHOR) DB_PATH = os.path.join(CACHE_DIR, "cache.db") +# Slot identifiers used by per-slot cache rows. +SLOT_SYNCED = "SYNCED" +SLOT_UNSYNCED = "UNSYNCED" # .env loading _config_env = Path(user_config_dir(APP_NAME, APP_AUTHOR)) / ".env" diff --git a/src/lrx_cli/core.py b/src/lrx_cli/core.py index cb7c23b..71b7e2d 100644 --- a/src/lrx_cli/core.py +++ b/src/lrx_cli/core.py @@ -20,9 +20,12 @@ from .config import ( TTL_NOT_FOUND, TTL_NETWORK_ERROR, HIGH_CONFIDENCE, + SLOT_SYNCED, + SLOT_UNSYNCED, ) from .models import TrackMeta, LyricResult, CacheStatus from .enrichers import create_enrichers, enrich_track +from .ranking import is_better_result, select_best_positive # Maps CacheStatus to the default TTL used when storing results @@ -34,44 +37,11 @@ _STATUS_TTL: dict[CacheStatus, Optional[int]] = { } -def _is_better(new: LyricResult, old: LyricResult, allow_unsynced: bool) -> bool: - """Compare two results: higher confidence wins; if equal, synced > unsynced. - If allow_unsynced is False, treat unsynced as strictly worse than any synced.""" - - # If new is negative, it's definitely not better - if new.status not in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED): - return False - # If old is negative, the result is better or equal regardless of other factors - if old.status not in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED): - return True - # If unsynced results are not allowed, treat them as strictly worse than any synced result - if not allow_unsynced: - if ( - new.status == CacheStatus.SUCCESS_UNSYNCED - and old.status == CacheStatus.SUCCESS_SYNCED - ): - return False - if ( - old.status == CacheStatus.SUCCESS_UNSYNCED - and new.status == CacheStatus.SUCCESS_SYNCED - ): - return True - # Compare confidence - if new.confidence != old.confidence: - return new.confidence > old.confidence - # Equal confidence — prefer synced as tiebreaker - # Will return false if unsynced results are not allowed - return ( - new.status == CacheStatus.SUCCESS_SYNCED - and old.status != CacheStatus.SUCCESS_SYNCED - ) - - def _pick_for_return( result: FetchResult, allow_unsynced: bool, ) -> Optional[LyricResult]: - """Pick which lyric result should participate in final selection.""" + """Pick best positive slot for final selection under current strategy.""" candidates: list[LyricResult] = [] if result.synced and result.synced.status == CacheStatus.SUCCESS_SYNCED: candidates.append(result.synced) @@ -82,43 +52,41 @@ def _pick_for_return( ): candidates.append(result.unsynced) - if not candidates: - return None - - best = candidates[0] - for c in candidates[1:]: - if _is_better(c, best, allow_unsynced=True): - best = c - return best + return select_best_positive(candidates, allow_unsynced=True) -def _pick_for_cache(result: FetchResult) -> Optional[LyricResult]: - """Pick a single cacheable result from FetchResult for legacy one-slot cache schema.""" - slots = [r for r in (result.synced, result.unsynced) if r is not None] - if not slots: - return None +def _iter_slot_results(result: FetchResult) -> list[tuple[str, LyricResult]]: + """Return all non-None slot results with their cache slot key.""" + out: list[tuple[str, LyricResult]] = [] + if result.synced is not None: + out.append((SLOT_SYNCED, result.synced)) + if result.unsynced is not None: + out.append((SLOT_UNSYNCED, result.unsynced)) + return out - positives = [ - r - for r in slots - if r.status in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED) - ] - if positives: - best = positives[0] - for p in positives[1:]: - if _is_better(p, best, allow_unsynced=True): - best = p - return best - # If there is no positive result, prefer caching NETWORK_ERROR over NOT_FOUND - # to avoid long false-negative TTL when error signals disagree between slots. - for r in slots: - if r.status == CacheStatus.NETWORK_ERROR: - return r - for r in slots: - if r.status == CacheStatus.NOT_FOUND: - return r - return None +def _pick_cached_for_return( + cached_rows: list[LyricResult], + allow_unsynced: bool, +) -> Optional[LyricResult]: + """Convert cached slot rows into FetchResult-like view and select return candidate.""" + fr = FetchResult() + for row in cached_rows: + if row.status == CacheStatus.SUCCESS_SYNCED: + fr = FetchResult(synced=row, unsynced=fr.unsynced) + elif row.status == CacheStatus.SUCCESS_UNSYNCED: + fr = FetchResult(synced=fr.synced, unsynced=row) + return _pick_for_return(fr, allow_unsynced) + + +def _has_negative_for_both_slots(cached_rows: list[LyricResult]) -> bool: + """True when both slot rows are present and both are negative.""" + if len(cached_rows) < 2: + return False + return all( + r.status in (CacheStatus.NOT_FOUND, CacheStatus.NETWORK_ERROR) + for r in cached_rows + ) class LrcManager: @@ -137,33 +105,38 @@ class LrcManager: bypass_cache: bool, allow_unsynced: bool, ) -> list[tuple[str, LyricResult]]: - """Run one group: cache-check first, then parallel-fetch uncached. Returns (source, result) pairs.""" + """Run one group with slot-aware cache check then parallel fetch uncached sources.""" cached_results: list[tuple[str, LyricResult]] = [] need_fetch: list[BaseFetcher] = [] for fetcher in group: source = fetcher.source_name if not bypass_cache and not fetcher.self_cached: - cached = self.cache.get(track, source) - if cached: - if cached.status in ( - CacheStatus.NOT_FOUND, - CacheStatus.NETWORK_ERROR, - ): + cached_rows = self.cache.get_all(track, source) + if cached_rows: + if _has_negative_for_both_slots(cached_rows): logger.debug( - f"[{source}] cache hit: {cached.status.value}, skipping" + f"[{source}] cache hit: all slots negative, skipping" ) continue - is_trusted = cached.confidence >= HIGH_CONFIDENCE - logger.info( - f"[{source}] cache hit: {cached.status.value}" - f" (confidence={cached.confidence:.0f})" + + cached_for_return = _pick_cached_for_return( + cached_rows, allow_unsynced ) - cached_results.append((source, cached)) - # Return immediately on trusted synced cache hit - if cached.status == CacheStatus.SUCCESS_SYNCED and is_trusted: - return cached_results - continue + if cached_for_return is not None: + is_trusted = cached_for_return.confidence >= HIGH_CONFIDENCE + logger.info( + f"[{source}] cache hit: {cached_for_return.status.value}" + f" (confidence={cached_for_return.confidence:.0f})" + ) + cached_results.append((source, cached_for_return)) + # Return immediately on trusted synced cache hit + if ( + cached_for_return.status == CacheStatus.SUCCESS_SYNCED + and is_trusted + ): + return cached_results + continue elif not fetcher.self_cached: logger.debug(f"[{source}] cache bypassed") need_fetch.append(fetcher) @@ -193,18 +166,20 @@ class LrcManager: logger.debug(f"[{source}] returned None") continue - cache_result = _pick_for_cache(result) return_result = _pick_for_return(result, allow_unsynced) - if ( - cache_result is not None - and not fetcher.self_cached - and not bypass_cache - ): - ttl = cache_result.ttl or _STATUS_TTL.get( - cache_result.status, TTL_NOT_FOUND - ) - self.cache.set(track, source, cache_result, ttl_seconds=ttl) + if not fetcher.self_cached and not bypass_cache: + for slot_kind, slot_result in _iter_slot_results(result): + ttl = slot_result.ttl or _STATUS_TTL.get( + slot_result.status, TTL_NOT_FOUND + ) + self.cache.set( + track, + source, + slot_result, + ttl_seconds=ttl, + positive_kind=slot_kind, + ) if return_result is not None: logger.info( @@ -269,8 +244,10 @@ class LrcManager: ) return result - if best_result is None or _is_better( - result, best_result, allow_unsynced + if best_result is None or is_better_result( + result, + best_result, + allow_unsynced=allow_unsynced, ): best_result = result diff --git a/src/lrx_cli/ranking.py b/src/lrx_cli/ranking.py new file mode 100644 index 0000000..74e5193 --- /dev/null +++ b/src/lrx_cli/ranking.py @@ -0,0 +1,69 @@ +"""Shared ranking rules for LyricResult selection. + +This module centralizes how positive lyric results are compared so cache/core +and other callers use the same precedence and edge-case handling. +""" + +from __future__ import annotations + +from typing import Optional + +from .models import CacheStatus, LyricResult + + +def is_positive_status(status: CacheStatus) -> bool: + return status in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED) + + +def is_better_result( + new: LyricResult, + old: LyricResult, + *, + allow_unsynced: bool, +) -> bool: + """Return True when *new* should rank above *old*. + + Ordering rules (highest first): + 1) Positive statuses always beat negative statuses. + 2) When allow_unsynced=False, SUCCESS_SYNCED always beats SUCCESS_UNSYNCED. + 3) Higher confidence beats lower confidence. + 4) On equal confidence, SUCCESS_SYNCED beats SUCCESS_UNSYNCED. + """ + new_positive = is_positive_status(new.status) + old_positive = is_positive_status(old.status) + + if not new_positive: + return False + if not old_positive: + return True + + new_synced = new.status == CacheStatus.SUCCESS_SYNCED + old_synced = old.status == CacheStatus.SUCCESS_SYNCED + + if not allow_unsynced and new_synced != old_synced: + return new_synced + + if new.confidence != old.confidence: + return new.confidence > old.confidence + + return new_synced and not old_synced + + +def select_best_positive( + candidates: list[LyricResult], + *, + allow_unsynced: bool, +) -> Optional[LyricResult]: + """Pick best positive LyricResult from candidates. + + Negative statuses are ignored. + """ + positives = [c for c in candidates if is_positive_status(c.status)] + if not positives: + return None + + best = positives[0] + for c in positives[1:]: + if is_better_result(c, best, allow_unsynced=allow_unsynced): + best = c + return best diff --git a/tests/test_cache.py b/tests/test_cache.py index ffc39c3..835c972 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -7,6 +7,8 @@ import pytest from lrx_cli.cache import ( CacheEngine, + SLOT_SYNCED, + SLOT_UNSYNCED, _generate_key, ) from lrx_cli.config import DURATION_TOLERANCE_MS @@ -67,10 +69,10 @@ def test_generate_key_raises_when_metadata_missing() -> None: def test_migrate_adds_confidence_version_and_boosts_unsynced(tmp_path: Path) -> None: - """Legacy cache without confidence_version is migrated in-place. + """Legacy single-row cache is migrated to slot rows. Expected behavior: - - add confidence_version column + - add positive_kind and confidence_version - boost SUCCESS_UNSYNCED confidence by +10 with cap at 100 - keep SUCCESS_SYNCED confidence unchanged """ @@ -111,16 +113,107 @@ def test_migrate_adds_confidence_version_and_boosts_unsynced(tmp_path: Path) -> with sqlite3.connect(db_path) as conn: cols = {r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall()} rows = conn.execute( - "SELECT key, status, confidence, confidence_version FROM cache ORDER BY key" + "SELECT key, positive_kind, status, confidence, confidence_version FROM cache ORDER BY key, positive_kind" ).fetchall() + assert "positive_kind" in cols assert "confidence_version" in cols by_key = { - k: (status, confidence, version) for k, status, confidence, version in rows + (k, slot): (status, confidence, version) + for k, slot, status, confidence, version in rows } - assert by_key["u1"] == ("SUCCESS_UNSYNCED", 95.0, 1) - assert by_key["u2"] == ("SUCCESS_UNSYNCED", 100.0, 1) - assert by_key["s1"] == ("SUCCESS_SYNCED", 70.0, 1) + assert by_key[("u1", SLOT_UNSYNCED)] == ("SUCCESS_UNSYNCED", 95.0, 1) + assert by_key[("u2", SLOT_UNSYNCED)] == ("SUCCESS_UNSYNCED", 100.0, 1) + assert by_key[("s1", SLOT_SYNCED)] == ("SUCCESS_SYNCED", 70.0, 1) + + +def test_migrate_negative_row_splits_into_two_slot_rows(tmp_path: Path) -> None: + db_path = tmp_path / "legacy-negative.db" + + with sqlite3.connect(db_path) as conn: + conn.execute( + """ + CREATE TABLE cache ( + key TEXT PRIMARY KEY, + source TEXT NOT NULL, + status TEXT NOT NULL, + lyrics TEXT, + created_at INTEGER NOT NULL, + expires_at INTEGER, + artist TEXT, + title TEXT, + album TEXT, + length INTEGER, + confidence REAL + ) + """ + ) + conn.execute( + """ + INSERT INTO cache + (key, source, status, lyrics, created_at, expires_at, artist, title, album, length, confidence) + VALUES + ('n1', 's1', 'NOT_FOUND', NULL, 1, NULL, 'A', 'T', 'AL', 180000, 0.0) + """ + ) + conn.commit() + + CacheEngine(str(db_path)) + + with sqlite3.connect(db_path) as conn: + rows = conn.execute( + "SELECT key, positive_kind, status FROM cache ORDER BY positive_kind" + ).fetchall() + + assert rows == [ + ("n1", SLOT_SYNCED, "NOT_FOUND"), + ("n1", SLOT_UNSYNCED, "NOT_FOUND"), + ] + + +def test_migrate_normalizes_old_slot_spelling(tmp_path: Path) -> None: + db_path = tmp_path / "slot-spelling.db" + + with sqlite3.connect(db_path) as conn: + conn.execute( + """ + CREATE TABLE cache ( + key TEXT NOT NULL, + positive_kind TEXT NOT NULL, + source TEXT NOT NULL, + status TEXT NOT NULL, + lyrics TEXT, + created_at INTEGER NOT NULL, + expires_at INTEGER, + artist TEXT, + title TEXT, + album TEXT, + length INTEGER, + confidence REAL, + confidence_version INTEGER, + PRIMARY KEY (key, positive_kind) + ) + """ + ) + conn.execute( + """ + INSERT INTO cache + (key, positive_kind, source, status, lyrics, created_at, expires_at, artist, title, album, length, confidence, confidence_version) + VALUES + ('k1', 'SYNCHED', 's1', 'SUCCESS_SYNCED', 'l1', 1, NULL, 'A', 'T', 'AL', 180000, 80.0, 1), + ('k1', 'UNSYNCHED', 's1', 'SUCCESS_UNSYNCED', 'l2', 1, NULL, 'A', 'T', 'AL', 180000, 70.0, 1) + """ + ) + conn.commit() + + CacheEngine(str(db_path)) + + with sqlite3.connect(db_path) as conn: + rows = conn.execute( + "SELECT positive_kind FROM cache ORDER BY positive_kind" + ).fetchall() + + assert rows == [(SLOT_SYNCED,), (SLOT_UNSYNCED,)] def test_set_and_get_roundtrip_with_ttl( @@ -136,9 +229,10 @@ def test_set_and_get_roundtrip_with_ttl( ttl_seconds=120, ) - cached = cache_db.get(track, "lrclib") + cached_rows = cache_db.get_all(track, "lrclib") - assert cached is not None + assert len(cached_rows) == 1 + cached = cached_rows[0] assert cached.status is CacheStatus.SUCCESS_SYNCED assert str(cached.lyrics) == "[00:01.00]line" assert cached.source == "lrclib" @@ -158,12 +252,29 @@ def test_get_expired_entry_returns_none_and_removes_row( ) monkeypatch.setattr("lrx_cli.cache.time.time", lambda: 2_000_020) - cached = cache_db.get(track, "netease") + cached_rows = cache_db.get_all(track, "netease") - assert cached is None + assert cached_rows == [] assert cache_db.query_all() == [] +def test_set_negative_without_slot_writes_both_slots(cache_db: CacheEngine) -> None: + track = _track() + cache_db.set( + track, "src", _result(CacheStatus.NOT_FOUND, None, "src"), ttl_seconds=60 + ) + + with sqlite3.connect(cache_db.db_path) as conn: + rows = conn.execute( + "SELECT positive_kind, status FROM cache ORDER BY positive_kind" + ).fetchall() + + assert rows == [ + (SLOT_SYNCED, CacheStatus.NOT_FOUND.value), + (SLOT_UNSYNCED, CacheStatus.NOT_FOUND.value), + ] + + def test_get_backfills_missing_length_when_track_provides_it( cache_db: CacheEngine, ) -> None: @@ -187,9 +298,9 @@ def test_get_backfills_missing_length_when_track_provides_it( album=None, length=200000, ) - cached = cache_db.get(track_with_length, "spotify") + cached_rows = cache_db.get_all(track_with_length, "spotify") - assert cached is not None + assert cached_rows with sqlite3.connect(cache_db.db_path) as conn: row = conn.execute("SELECT length FROM cache LIMIT 1").fetchone() @@ -268,22 +379,6 @@ def test_prune_removes_only_expired_rows( assert rows[0]["source"] == "s-active" -def test_find_best_positive_uses_exact_match_and_prefers_synced( - cache_db: CacheEngine, -) -> None: - track = _track(artist="Artist", title="Song", album="Album") - cache_db.set(track, "s1", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "s1")) - cache_db.set(track, "s2", _result(CacheStatus.SUCCESS_SYNCED, "s", "s2")) - - best = cache_db.find_best_positive(track, CacheStatus.SUCCESS_SYNCED) - - assert best is not None - assert best.status is CacheStatus.SUCCESS_SYNCED - assert str(best.lyrics) == "s" - # find_best_positive always reports cache-search source - assert best.source == "cache-search" - - def test_find_best_positive_returns_status_specific_results( cache_db: CacheEngine, ) -> None: @@ -297,6 +392,7 @@ def test_find_best_positive_returns_status_specific_results( assert best_synced is not None assert best_synced.status is CacheStatus.SUCCESS_SYNCED assert str(best_synced.lyrics) == "s" + assert best_synced.source == "cache-search" best_unsynced = cache_db.find_best_positive(track, CacheStatus.SUCCESS_UNSYNCED) assert best_unsynced is not None @@ -395,6 +491,34 @@ def test_update_confidence_targets_specific_source(cache_db: CacheEngine) -> Non assert rows["s2"]["confidence"] == 100.0 # unchanged default +def test_update_confidence_updates_both_slots_for_same_source( + cache_db: CacheEngine, +) -> None: + track = _track(artist="A", title="T", album="AL") + cache_db.set( + track, + "src", + _result(CacheStatus.SUCCESS_SYNCED, "sync", "src"), + positive_kind=SLOT_SYNCED, + ) + cache_db.set( + track, + "src", + _result(CacheStatus.SUCCESS_UNSYNCED, "unsync", "src"), + positive_kind=SLOT_UNSYNCED, + ) + + updated = cache_db.update_confidence(track, 66.0, "src") + assert updated == 2 + + with sqlite3.connect(cache_db.db_path) as conn: + rows = conn.execute( + "SELECT positive_kind, confidence FROM cache WHERE source = 'src' ORDER BY positive_kind" + ).fetchall() + + assert rows == [(SLOT_SYNCED, 66.0), (SLOT_UNSYNCED, 66.0)] + + def test_update_confidence_returns_zero_for_missing_source( cache_db: CacheEngine, ) -> None: @@ -476,3 +600,5 @@ def test_query_track_and_stats_return_expected_aggregates( assert stats["expired"] == 0 assert stats["by_status"][CacheStatus.SUCCESS_SYNCED.value] == 1 assert stats["by_status"][CacheStatus.SUCCESS_UNSYNCED.value] == 1 + assert stats["by_slot"][SLOT_SYNCED] == 1 + assert stats["by_slot"][SLOT_UNSYNCED] == 1 diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index b4a5e49..e00e465 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -2,9 +2,9 @@ from __future__ import annotations import asyncio from unittest.mock import patch -import pytest from lrx_cli.config import HIGH_CONFIDENCE +from lrx_cli.cache import SLOT_UNSYNCED from lrx_cli.core import LrcManager from lrx_cli.fetchers.base import BaseFetcher, FetchResult from lrx_cli.lrc import LRCData @@ -137,7 +137,7 @@ def test_trusted_synced_cancels_sibling(tmp_path): assert result.source == "fast" -def test_best_confidence_within_group(tmp_path): +def test_allow_unsynced_true_picks_highest_confidence_unsynced(tmp_path): """When allow_unsynced=True and no trusted synced result, highest-confidence unsynced is returned.""" low = MockFetcher("low", _fr(unsynced=_unsynced("low", confidence=40.0))) high = MockFetcher("high", _fr(unsynced=_unsynced("high", confidence=70.0))) @@ -148,6 +148,22 @@ def test_best_confidence_within_group(tmp_path): assert result.source == "high" +def test_equal_confidence_prefers_synced_when_unsynced_allowed(tmp_path): + """Tie on confidence should still prefer synced over unsynced.""" + dual = MockFetcher( + "dual", + _fr( + synced=_synced("dual", confidence=70.0), + unsynced=_unsynced("dual", confidence=70.0), + ), + ) + manager = make_manager(tmp_path) + with patch("lrx_cli.core.build_plan", return_value=[[dual]]): + result = manager.fetch_for_track(_track(), allow_unsynced=True) + assert result is not None + assert result.status == CacheStatus.SUCCESS_SYNCED + + def test_unsynced_only_returns_none_when_not_allowed(tmp_path): """When allow_unsynced=False, unsynced-only pipeline result must be rejected.""" only_unsynced = MockFetcher( @@ -210,22 +226,10 @@ def test_cache_trusted_synced_no_fetch(tmp_path): assert result.status == CacheStatus.SUCCESS_SYNCED -@pytest.mark.xfail( - strict=True, - reason=( - "Known limitation: cache stores only one positive slot; after an allow_unsynced=True " - "request caches unsynced, later allow_unsynced=False request does not re-fetch synced" - ), -) -def test_xfail_cached_unsynced_should_not_block_live_synced_when_unsynced_disallowed( +def test_cached_slots_support_strategy_switch_without_refetch( tmp_path, ): - """Known gap reproduced with strategy switch across two requests. - - 1) Fetcher returns both synced and unsynced. - 2) allow_unsynced=True picks/caches higher-confidence unsynced. - 3) allow_unsynced=False should re-fetch synced, but currently short-circuits on cache. - """ + """When both slots are cached, strategy switch should reuse cache without re-fetch.""" fetcher = MockFetcher( "src", _fr( @@ -244,10 +248,32 @@ def test_xfail_cached_unsynced_should_not_block_live_synced_when_unsynced_disall fetcher.called = False - # Second request: stricter strategy should recover synced via re-fetch. + # Second request: stricter strategy should use synced cache slot directly. with patch("lrx_cli.core.build_plan", return_value=[[fetcher]]): second = manager.fetch_for_track(track, allow_unsynced=False) - assert fetcher.called + assert not fetcher.called assert second is not None assert second.status == CacheStatus.SUCCESS_SYNCED + + +def test_unsynced_cache_only_still_fetches_when_unsynced_disallowed(tmp_path): + """If only unsynced cache slot exists, allow_unsynced=False must still fetch synced.""" + fetcher = MockFetcher("src", _fr(synced=_synced("src", confidence=88.0))) + manager = make_manager(tmp_path) + track = _track() + + manager.cache.set( + track, + "src", + _unsynced("src", confidence=95.0), + ttl_seconds=3600, + positive_kind=SLOT_UNSYNCED, + ) + + with patch("lrx_cli.core.build_plan", return_value=[[fetcher]]): + result = manager.fetch_for_track(track, allow_unsynced=False) + + assert fetcher.called + assert result is not None + assert result.status == CacheStatus.SUCCESS_SYNCED diff --git a/uv.lock b/uv.lock index ac139a1..85d0af2 100644 --- a/uv.lock +++ b/uv.lock @@ -153,7 +153,7 @@ wheels = [ [[package]] name = "lrx-cli" -version = "0.6.0" +version = "0.6.1" source = { editable = "." } dependencies = [ { name = "cyclopts" },