refactor: large scale refactor regarding selection & fetchers

This commit is contained in:
2026-04-06 13:37:51 +02:00
parent 69b7f5c60c
commit 0c85af534e
23 changed files with 794 additions and 364 deletions
+4
View File
@@ -9,3 +9,7 @@ wheels/
.* .*
!.gitignore !.gitignore
!.python-version !.python-version
TODO.md
PENDING.md
SOLVED.md
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "lrx-cli" name = "lrx-cli"
version = "0.5.6" version = "0.6.0"
description = "Fetch line-synced lyrics for your music player." description = "Fetch line-synced lyrics for your music player."
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
+51 -32
View File
@@ -1,7 +1,8 @@
""" """
Author: Uyanide pywang0608@foxmail.com Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 10:18:03 Date: 2026-03-25 10:18:03
Description: SQLite-based lyric cache with per-source storage and TTL expiration. Description: SQLite-based lyric cache with per-source storage, TTL expiration,
and lightweight schema migrations (including confidence versioning).
""" """
import json import json
@@ -13,11 +14,7 @@ from loguru import logger
from .lrc import LRCData from .lrc import LRCData
from .normalize import normalize_for_match as _normalize_for_match from .normalize import normalize_for_match as _normalize_for_match
from .config import ( from .config import DURATION_TOLERANCE_MS, LEGACY_CONFIDENCE, CONFIDENCE_ALGO_VERSION
DURATION_TOLERANCE_MS,
LEGACY_CONFIDENCE_SYNCED,
LEGACY_CONFIDENCE_UNSYNCED,
)
from .models import TrackMeta, LyricResult, CacheStatus from .models import TrackMeta, LyricResult, CacheStatus
@@ -79,7 +76,14 @@ class CacheEngine:
self._init_db() self._init_db()
def _init_db(self) -> None: def _init_db(self) -> None:
"""Create or migrate the cache and credentials tables.""" """Create or migrate cache schema and credentials table.
Migration notes:
- Add structural columns introduced after initial releases.
- When introducing confidence versioning, rebalance legacy unsynced
confidence (+10, capped at 100) and stamp migrated rows with the
current algorithm version.
"""
with sqlite3.connect(self.db_path) as conn: with sqlite3.connect(self.db_path) as conn:
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS cache ( CREATE TABLE IF NOT EXISTS cache (
@@ -91,7 +95,10 @@ class CacheEngine:
expires_at INTEGER, expires_at INTEGER,
artist TEXT, artist TEXT,
title TEXT, title TEXT,
album TEXT album TEXT,
length INTEGER,
confidence REAL,
confidence_version INTEGER
) )
""") """)
conn.execute(""" conn.execute("""
@@ -101,12 +108,28 @@ class CacheEngine:
expires_at INTEGER expires_at INTEGER
) )
""") """)
# Migrations # Incremental, idempotent migrations for existing databases.
cols = {r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall()} cols = {r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall()}
if "length" not in cols: if "length" not in cols:
conn.execute("ALTER TABLE cache ADD COLUMN length INTEGER") conn.execute("ALTER TABLE cache ADD COLUMN length INTEGER")
if "confidence" not in cols: if "confidence" not in cols:
conn.execute("ALTER TABLE cache ADD COLUMN confidence REAL") conn.execute("ALTER TABLE cache ADD COLUMN confidence REAL")
if "confidence_version" not in cols:
conn.execute("ALTER TABLE cache ADD COLUMN confidence_version INTEGER")
# First-time confidence-version migration: boost unsynced rows
# from older scoring assumptions while preserving upper bound.
conn.execute(
"""
UPDATE cache
SET confidence = MIN(100.0, COALESCE(confidence, ?) + 10.0)
WHERE status = ?
""",
(LEGACY_CONFIDENCE, CacheStatus.SUCCESS_UNSYNCED.value),
)
conn.execute(
"UPDATE cache SET confidence_version = ? WHERE confidence_version IS NULL",
(CONFIDENCE_ALGO_VERSION,),
)
conn.commit() conn.commit()
# Read # Read
@@ -155,10 +178,8 @@ class CacheEngine:
) )
status = CacheStatus(status_str) status = CacheStatus(status_str)
if confidence is None: if confidence is None:
if status == CacheStatus.SUCCESS_SYNCED: if status in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED):
confidence = LEGACY_CONFIDENCE_SYNCED confidence = LEGACY_CONFIDENCE
elif status == CacheStatus.SUCCESS_UNSYNCED:
confidence = LEGACY_CONFIDENCE_UNSYNCED
else: else:
confidence = 0.0 # negative statuses: no confidence confidence = 0.0 # negative statuses: no confidence
@@ -207,7 +228,11 @@ class CacheEngine:
result: LyricResult, result: LyricResult,
ttl_seconds: Optional[int] = None, ttl_seconds: Optional[int] = None,
) -> None: ) -> None:
"""Store a lyric result in the cache.""" """Store a lyric result in the cache.
New/updated rows are tagged with the current confidence algorithm
version so future migrations can be applied deterministically.
"""
try: try:
key = _generate_key(track, source) key = _generate_key(track, source)
except ValueError: except ValueError:
@@ -221,8 +246,8 @@ class CacheEngine:
conn.execute( conn.execute(
"""INSERT OR REPLACE INTO cache """INSERT OR REPLACE INTO cache
(key, source, status, lyrics, created_at, expires_at, (key, source, status, lyrics, created_at, expires_at,
artist, title, album, length, confidence) artist, title, album, length, confidence, confidence_version)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
( (
key, key,
source, source,
@@ -235,6 +260,7 @@ class CacheEngine:
track.album, track.album,
track.length, track.length,
result.confidence, result.confidence,
CONFIDENCE_ALGO_VERSION,
), ),
) )
conn.commit() conn.commit()
@@ -288,7 +314,9 @@ class CacheEngine:
# Exact cross-source search # Exact cross-source search
def find_best_positive(self, track: TrackMeta) -> Optional[LyricResult]: def find_best_positive(
self, track: TrackMeta, status: CacheStatus
) -> Optional[LyricResult]:
"""Find the best positive (synced/unsynced) cache entry for *track*. """Find the best positive (synced/unsynced) cache entry for *track*.
Uses exact metadata match (artist + title + album) across all sources. Uses exact metadata match (artist + title + album) across all sources.
@@ -303,21 +331,16 @@ class CacheEngine:
rows = conn.execute( rows = conn.execute(
f"SELECT status, lyrics, source, confidence FROM cache" f"SELECT status, lyrics, source, confidence FROM cache"
f" WHERE {_TRACK_WHERE}" f" WHERE {_TRACK_WHERE}"
" AND status IN (?, ?)" " AND status = ?"
" AND (expires_at IS NULL OR expires_at > ?)" " AND (expires_at IS NULL OR expires_at > ?)"
" ORDER BY COALESCE(confidence," " ORDER BY COALESCE(confidence, ?) DESC,"
" CASE status WHEN ? THEN ? ELSE ? END"
" ) DESC,"
" CASE status WHEN ? THEN 0 ELSE 1 END," " CASE status WHEN ? THEN 0 ELSE 1 END,"
" created_at DESC LIMIT 1", " created_at DESC",
_track_where_params(track) _track_where_params(track)
+ [ + [
CacheStatus.SUCCESS_SYNCED.value, status.value,
CacheStatus.SUCCESS_UNSYNCED.value,
now, now,
CacheStatus.SUCCESS_SYNCED.value, LEGACY_CONFIDENCE,
LEGACY_CONFIDENCE_SYNCED,
LEGACY_CONFIDENCE_UNSYNCED,
CacheStatus.SUCCESS_SYNCED.value, CacheStatus.SUCCESS_SYNCED.value,
], ],
).fetchall() ).fetchall()
@@ -328,11 +351,7 @@ class CacheEngine:
row = dict(rows[0]) row = dict(rows[0])
confidence = row["confidence"] confidence = row["confidence"]
if confidence is None: if confidence is None:
confidence = ( confidence = LEGACY_CONFIDENCE
LEGACY_CONFIDENCE_SYNCED
if row["status"] == CacheStatus.SUCCESS_SYNCED.value
else LEGACY_CONFIDENCE_UNSYNCED
)
return LyricResult( return LyricResult(
status=CacheStatus(row["status"]), status=CacheStatus(row["status"]),
lyrics=LRCData(row["lyrics"]) if row["lyrics"] else None, lyrics=LRCData(row["lyrics"]) if row["lyrics"] else None,
+35 -16
View File
@@ -14,7 +14,7 @@ import cyclopts
from loguru import logger from loguru import logger
from .config import DB_PATH, enable_debug from .config import DB_PATH, enable_debug
from .models import TrackMeta, CacheStatus from .models import TrackMeta
from .mpris import get_current_track from .mpris import get_current_track
from .core import LrcManager from .core import LrcManager
from .fetchers import FetcherMethodType from .fetchers import FetcherMethodType
@@ -88,10 +88,12 @@ def fetch(
name="--no-cache", negative="", help="Bypass the cache for this request." name="--no-cache", negative="", help="Bypass the cache for this request."
), ),
] = False, ] = False,
only_synced: Annotated[ allow_unsynced: Annotated[
bool, bool,
cyclopts.Parameter( cyclopts.Parameter(
name="--only-synced", negative="", help="Only accept synced (timed) lyrics." name="--allow-unsynced",
negative="",
help="Allow unsynced lyrics (will be displayed with all time tags set to [00:00.00]).",
), ),
] = False, ] = False,
plain: Annotated[ plain: Annotated[
@@ -110,16 +112,17 @@ def fetch(
logger.info(f"Track: {track.display_name()}") logger.info(f"Track: {track.display_name()}")
result = manager.fetch_for_track(track, force_method=method, bypass_cache=no_cache) result = manager.fetch_for_track(
track,
force_method=method,
bypass_cache=no_cache,
allow_unsynced=allow_unsynced,
)
if not result or not result.lyrics: if not result or not result.lyrics:
logger.error("No lyrics found.") logger.error("No lyrics found.")
sys.exit(1) sys.exit(1)
if only_synced and result.status != CacheStatus.SUCCESS_SYNCED:
logger.error("Only unsynced lyrics available (--only-synced requested).")
sys.exit(1)
print(result.lyrics.to_lrc(plain=plain)) print(result.lyrics.to_lrc(plain=plain))
@@ -165,10 +168,12 @@ def search(
name="--no-cache", negative="", help="Bypass the cache for this request." name="--no-cache", negative="", help="Bypass the cache for this request."
), ),
] = False, ] = False,
only_synced: Annotated[ allow_unsynced: Annotated[
bool, bool,
cyclopts.Parameter( cyclopts.Parameter(
name="--only-synced", negative="", help="Only accept synced (timed) lyrics." name="--allow-unsynced",
negative="",
help="Allow unsynced lyrics (will be displayed with all time tags set to [00:00.00]).",
), ),
] = False, ] = False,
plain: Annotated[ plain: Annotated[
@@ -198,16 +203,17 @@ def search(
logger.info(f"Track: {track.display_name()}") logger.info(f"Track: {track.display_name()}")
result = manager.fetch_for_track(track, force_method=method, bypass_cache=no_cache) result = manager.fetch_for_track(
track,
force_method=method,
bypass_cache=no_cache,
allow_unsynced=allow_unsynced,
)
if not result or not result.lyrics: if not result or not result.lyrics:
logger.error("No lyrics found.") logger.error("No lyrics found.")
sys.exit(1) sys.exit(1)
if only_synced and result.status != CacheStatus.SUCCESS_SYNCED:
logger.error("Only unsynced lyrics available (--only-synced requested).")
sys.exit(1)
print(result.lyrics.to_lrc(plain=plain)) print(result.lyrics.to_lrc(plain=plain))
@@ -236,6 +242,14 @@ def export(
name=["--overwrite", "-f"], negative="", help="Overwrite existing file." name=["--overwrite", "-f"], negative="", help="Overwrite existing file."
), ),
] = False, ] = False,
allow_unsynced: Annotated[
bool,
cyclopts.Parameter(
name="--allow-unsynced",
negative="",
help="Allow unsynced lyrics (will be exported with all time tags set to [00:00.00] if --plain is not present).",
),
] = False,
plain: Annotated[ plain: Annotated[
bool, bool,
cyclopts.Parameter( cyclopts.Parameter(
@@ -249,7 +263,12 @@ def export(
logger.error("No active playing track found.") logger.error("No active playing track found.")
sys.exit(1) sys.exit(1)
result = manager.fetch_for_track(track, force_method=method, bypass_cache=no_cache) result = manager.fetch_for_track(
track,
force_method=method,
bypass_cache=no_cache,
allow_unsynced=allow_unsynced,
)
if not result or not result.lyrics: if not result or not result.lyrics:
logger.error("No lyrics available to export.") logger.error("No lyrics available to export.")
sys.exit(1) sys.exit(1)
+3 -3
View File
@@ -44,9 +44,10 @@ SCORE_W_ARTIST = 30.0
SCORE_W_ALBUM = 10.0 SCORE_W_ALBUM = 10.0
SCORE_W_DURATION = 10.0 SCORE_W_DURATION = 10.0
SCORE_W_SYNCED = 10.0 SCORE_W_SYNCED = 10.0
CONFIDENCE_ALGO_VERSION = 1
# Confidence thresholds # Confidence thresholds
MIN_CONFIDENCE = 25.0 # below this, candidate is rejected MIN_CONFIDENCE = 40.0 # below this, candidate is rejected
HIGH_CONFIDENCE = 80.0 # at or above this, stop searching early HIGH_CONFIDENCE = 80.0 # at or above this, stop searching early
# Multi-candidate fetching # Multi-candidate fetching
@@ -54,8 +55,7 @@ MULTI_CANDIDATE_LIMIT = 3 # max candidates to try per search-based fetcher
MULTI_CANDIDATE_DELAY_S = 0.2 # delay between sequential lyric fetches MULTI_CANDIDATE_DELAY_S = 0.2 # delay between sequential lyric fetches
# Legacy cache rows (no confidence stored) get a base score by sync status # Legacy cache rows (no confidence stored) get a base score by sync status
LEGACY_CONFIDENCE_SYNCED = 50.0 LEGACY_CONFIDENCE = 50.0
LEGACY_CONFIDENCE_UNSYNCED = 40.0
# User-Agents # User-Agents
UA_BROWSER = "Mozilla/5.0 (X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0" UA_BROWSER = "Mozilla/5.0 (X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0"
+129 -36
View File
@@ -10,7 +10,7 @@ from typing import Optional
from loguru import logger from loguru import logger
from .fetchers import FetcherMethodType, build_plan, create_fetchers from .fetchers import FetcherMethodType, build_plan, create_fetchers
from .fetchers.base import BaseFetcher from .fetchers.base import BaseFetcher, FetchResult
from .authenticators import create_authenticators from .authenticators import create_authenticators
from .cache import CacheEngine from .cache import CacheEngine
from .lrc import LRCData from .lrc import LRCData
@@ -34,28 +34,91 @@ _STATUS_TTL: dict[CacheStatus, Optional[int]] = {
} }
def _is_better(new: LyricResult, old: LyricResult) -> bool: def _is_better(new: LyricResult, old: LyricResult, allow_unsynced: bool) -> bool:
"""Compare two results: higher confidence wins; synced breaks ties.""" """Compare two results: higher confidence wins; if equal, synced > unsynced.
If allow_unsynced is False, treat unsynced as strictly worse than any synced."""
# If new is negative, it's definitely not better
if new.status not in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED):
return False
# If old is negative, the result is better or equal regardless of other factors
if old.status not in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED):
return True
# If unsynced results are not allowed, treat them as strictly worse than any synced result
if not allow_unsynced:
if (
new.status == CacheStatus.SUCCESS_UNSYNCED
and old.status == CacheStatus.SUCCESS_SYNCED
):
return False
if (
old.status == CacheStatus.SUCCESS_UNSYNCED
and new.status == CacheStatus.SUCCESS_SYNCED
):
return True
# Compare confidence
if new.confidence != old.confidence: if new.confidence != old.confidence:
return new.confidence > old.confidence return new.confidence > old.confidence
# Equal confidence — prefer synced as tiebreaker # Equal confidence — prefer synced as tiebreaker
# Will return false if unsynced results are not allowed
return ( return (
new.status == CacheStatus.SUCCESS_SYNCED new.status == CacheStatus.SUCCESS_SYNCED
and old.status != CacheStatus.SUCCESS_SYNCED and old.status != CacheStatus.SUCCESS_SYNCED
) )
def _normalize_result(result: LyricResult) -> LyricResult: def _pick_for_return(
"""Normalize unsynced lyrics before returning.""" result: FetchResult,
if result.status == CacheStatus.SUCCESS_UNSYNCED and result.lyrics: allow_unsynced: bool,
return LyricResult( ) -> Optional[LyricResult]:
status=result.status, """Pick which lyric result should participate in final selection."""
lyrics=result.lyrics.normalize_unsynced(), candidates: list[LyricResult] = []
source=result.source, if result.synced and result.synced.status == CacheStatus.SUCCESS_SYNCED:
ttl=result.ttl, candidates.append(result.synced)
confidence=result.confidence, if (
) allow_unsynced
return result and result.unsynced
and result.unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
candidates.append(result.unsynced)
if not candidates:
return None
best = candidates[0]
for c in candidates[1:]:
if _is_better(c, best, allow_unsynced=True):
best = c
return best
def _pick_for_cache(result: FetchResult) -> Optional[LyricResult]:
"""Pick a single cacheable result from FetchResult for legacy one-slot cache schema."""
slots = [r for r in (result.synced, result.unsynced) if r is not None]
if not slots:
return None
positives = [
r
for r in slots
if r.status in (CacheStatus.SUCCESS_SYNCED, CacheStatus.SUCCESS_UNSYNCED)
]
if positives:
best = positives[0]
for p in positives[1:]:
if _is_better(p, best, allow_unsynced=True):
best = p
return best
# If there is no positive result, prefer caching NETWORK_ERROR over NOT_FOUND
# to avoid long false-negative TTL when error signals disagree between slots.
for r in slots:
if r.status == CacheStatus.NETWORK_ERROR:
return r
for r in slots:
if r.status == CacheStatus.NOT_FOUND:
return r
return None
class LrcManager: class LrcManager:
@@ -72,6 +135,7 @@ class LrcManager:
group: list[BaseFetcher], group: list[BaseFetcher],
track: TrackMeta, track: TrackMeta,
bypass_cache: bool, bypass_cache: bool,
allow_unsynced: bool,
) -> list[tuple[str, LyricResult]]: ) -> list[tuple[str, LyricResult]]:
"""Run one group: cache-check first, then parallel-fetch uncached. Returns (source, result) pairs.""" """Run one group: cache-check first, then parallel-fetch uncached. Returns (source, result) pairs."""
cached_results: list[tuple[str, LyricResult]] = [] cached_results: list[tuple[str, LyricResult]] = []
@@ -129,25 +193,30 @@ class LrcManager:
logger.debug(f"[{source}] returned None") logger.debug(f"[{source}] returned None")
continue continue
if not fetcher.self_cached and not bypass_cache: cache_result = _pick_for_cache(result)
ttl = result.ttl or _STATUS_TTL.get( return_result = _pick_for_return(result, allow_unsynced)
result.status, TTL_NOT_FOUND
)
self.cache.set(track, source, result, ttl_seconds=ttl)
if result.status in (
CacheStatus.SUCCESS_SYNCED,
CacheStatus.SUCCESS_UNSYNCED,
):
logger.info(
f"[{source}] got {result.status.value} lyrics"
f" (confidence={result.confidence:.0f})"
)
cached_results.append((source, result))
if ( if (
result.status == CacheStatus.SUCCESS_SYNCED cache_result is not None
and result.confidence >= HIGH_CONFIDENCE and not fetcher.self_cached
and not bypass_cache
):
ttl = cache_result.ttl or _STATUS_TTL.get(
cache_result.status, TTL_NOT_FOUND
)
self.cache.set(track, source, cache_result, ttl_seconds=ttl)
if return_result is not None:
logger.info(
f"[{source}] got {return_result.status.value} lyrics"
f" (confidence={return_result.confidence:.0f})"
)
cached_results.append((source, return_result))
if (
return_result is not None
and return_result.status == CacheStatus.SUCCESS_SYNCED
and return_result.confidence >= HIGH_CONFIDENCE
): ):
found_trusted = True found_trusted = True
@@ -164,6 +233,7 @@ class LrcManager:
track: TrackMeta, track: TrackMeta,
force_method: Optional[FetcherMethodType], force_method: Optional[FetcherMethodType],
bypass_cache: bool, bypass_cache: bool,
allow_unsynced: bool,
) -> Optional[LyricResult]: ) -> Optional[LyricResult]:
track = await enrich_track(track, self.enrichers) track = await enrich_track(track, self.enrichers)
logger.info(f"Fetching lyrics for: {track.display_name()}") logger.info(f"Fetching lyrics for: {track.display_name()}")
@@ -175,7 +245,12 @@ class LrcManager:
best_result: Optional[LyricResult] = None best_result: Optional[LyricResult] = None
for group in plan: for group in plan:
group_results = await self._run_group(group, track, bypass_cache) group_results = await self._run_group(
group,
track,
bypass_cache,
allow_unsynced,
)
for source, result in group_results: for source, result in group_results:
if result.status not in ( if result.status not in (
@@ -192,16 +267,26 @@ class LrcManager:
f"Returning {result.status.value} lyrics from {source}" f"Returning {result.status.value} lyrics from {source}"
f" (confidence={result.confidence:.0f})" f" (confidence={result.confidence:.0f})"
) )
return _normalize_result(result) return result
if best_result is None or _is_better(result, best_result): if best_result is None or _is_better(
result, best_result, allow_unsynced
):
best_result = result best_result = result
if best_result: if best_result:
if (
best_result.status == CacheStatus.SUCCESS_UNSYNCED
and not allow_unsynced
):
logger.info(
f"Unsynced lyrics found from {best_result.source}, but unsynced results are not allowed"
)
return None
logger.info( logger.info(
f"Returning {best_result.status.value} lyrics from {best_result.source}" f"Returning {best_result.status.value} lyrics from {best_result.source}"
) )
return _normalize_result(best_result) return best_result
logger.info(f"No lyrics found for {track.display_name()}") logger.info(f"No lyrics found for {track.display_name()}")
return None return None
@@ -211,9 +296,17 @@ class LrcManager:
track: TrackMeta, track: TrackMeta,
force_method: Optional[FetcherMethodType] = None, force_method: Optional[FetcherMethodType] = None,
bypass_cache: bool = False, bypass_cache: bool = False,
allow_unsynced: bool = False,
) -> Optional[LyricResult]: ) -> Optional[LyricResult]:
"""Fetch lyrics for *track* using the group-based parallel pipeline.""" """Fetch lyrics for *track* using the group-based parallel pipeline."""
return asyncio.run(self._fetch_for_track(track, force_method, bypass_cache)) return asyncio.run(
self._fetch_for_track(
track,
force_method,
bypass_cache,
allow_unsynced,
)
)
def manual_insert( def manual_insert(
self, self,
+29 -4
View File
@@ -6,8 +6,35 @@ Description: Base fetcher class and common interfaces.
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Optional from typing import Optional
from dataclasses import dataclass
from ..models import TrackMeta, LyricResult from ..models import CacheStatus, TrackMeta, LyricResult
@dataclass(frozen=True, slots=True)
class FetchResult:
synced: Optional[LyricResult] = None
unsynced: Optional[LyricResult] = None
@staticmethod
def from_not_found() -> "FetchResult":
return FetchResult(
synced=LyricResult(status=CacheStatus.NOT_FOUND, lyrics=None, source=None),
unsynced=LyricResult(
status=CacheStatus.NOT_FOUND, lyrics=None, source=None
),
)
@staticmethod
def from_network_error() -> "FetchResult":
return FetchResult(
synced=LyricResult(
status=CacheStatus.NETWORK_ERROR, lyrics=None, source=None
),
unsynced=LyricResult(
status=CacheStatus.NETWORK_ERROR, lyrics=None, source=None
),
)
class BaseFetcher(ABC): class BaseFetcher(ABC):
@@ -28,8 +55,6 @@ class BaseFetcher(ABC):
pass pass
@abstractmethod @abstractmethod
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Fetch lyrics for the given track. Returns None if unable to fetch.""" """Fetch lyrics for the given track. Returns None if unable to fetch."""
pass pass
+58 -36
View File
@@ -12,7 +12,7 @@ from typing import Optional
from loguru import logger from loguru import logger
from .base import BaseFetcher from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_best from .selection import SearchCandidate, select_best
from ..models import TrackMeta, LyricResult, CacheStatus from ..models import TrackMeta, LyricResult, CacheStatus
from ..cache import CacheEngine from ..cache import CacheEngine
@@ -34,61 +34,50 @@ class CacheSearchFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool: def is_available(self, track: TrackMeta) -> bool:
return bool(track.title) return bool(track.title)
async def fetch( def _get_exact(self, track: TrackMeta, synced: bool) -> Optional[LyricResult]:
self, track: TrackMeta, bypass_cache: bool = False exact = self._cache.find_best_positive(
) -> Optional[LyricResult]: track,
if bypass_cache: CacheStatus.SUCCESS_SYNCED if synced else CacheStatus.SUCCESS_UNSYNCED,
logger.debug("Cache-search: bypassed by caller")
return None
if not track.title:
logger.debug("Cache-search: skipped — no title")
return None
# Fast path: exact metadata match (artist+title+album), single SQL query
exact = self._cache.find_best_positive(track)
if exact:
logger.info(f"Cache-search: exact hit ({exact.status.value})")
return exact
# Slow path: fuzzy cross-album search
matches = self._cache.search_by_meta(
title=track.title,
length=track.length,
) )
if exact and exact.lyrics is not None:
if not matches: logger.info(
logger.debug(f"Cache-search: no match for {track.display_name()}") f"Cache-search: exact {'synced' if synced else 'unsynced'} hit ({exact.status.value})"
)
return exact
return None return None
# Pick best by confidence scoring def _get_fuzzy(
candidates = [ self, matches: list, track: TrackMeta, synced: bool
) -> Optional[LyricResult]:
filtered = [
SearchCandidate( SearchCandidate(
item=m, item=m,
duration_ms=float(m["length"]) if m.get("length") else None, duration_ms=float(m["length"]) if m.get("length") else None,
is_synced=m.get("status") == CacheStatus.SUCCESS_SYNCED.value, is_synced=synced,
title=m.get("title"), title=m.get("title"),
artist=m.get("artist"), artist=m.get("artist"),
album=m.get("album"), album=m.get("album"),
) )
for m in matches for m in matches
if m.get("lyrics") if m.get("lyrics")
and (synced and m.get("status") == CacheStatus.SUCCESS_SYNCED.value)
or (not synced and m.get("status") == CacheStatus.SUCCESS_UNSYNCED.value)
] ]
best, confidence = select_best( best, confidence = select_best(
candidates, filtered,
track.length, track.length,
title=track.title, title=track.title,
artist=track.artist, artist=track.artist,
album=track.album, album=track.album,
) )
if best and best.get("lyrics") is not None:
if not best: status = (
return None CacheStatus.SUCCESS_SYNCED if synced else CacheStatus.SUCCESS_UNSYNCED
)
status = CacheStatus(best["status"])
logger.info( logger.info(
f"Cache-search: fuzzy hit from [{best.get('source')}] " f"Cache-search: fuzzy {'synced' if synced else 'unsynced'} hit from "
f"album={best.get('album')!r} ({status.value}, confidence={confidence:.0f})" f"[{best.get('source')}] album={best.get('album')!r} (confidence={confidence:.0f})"
) )
return LyricResult( return LyricResult(
status=status, status=status,
@@ -96,3 +85,36 @@ class CacheSearchFetcher(BaseFetcher):
source=self.source_name, source=self.source_name,
confidence=confidence, confidence=confidence,
) )
return None
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
if bypass_cache:
logger.debug("Cache-search: bypassed by caller")
return FetchResult()
if not track.title:
logger.debug("Cache-search: skipped — no title")
return FetchResult()
res_synced: Optional[LyricResult] = None
res_unsynced: Optional[LyricResult] = None
# Fast path: exact metadata match (artist+title+album), single SQL query
res_synced = self._get_exact(track, synced=True)
res_unsynced = self._get_exact(track, synced=False)
if res_synced and res_unsynced:
return FetchResult(synced=res_synced, unsynced=res_unsynced)
# Slow path: fuzzy cross-album search
matches = self._cache.search_by_meta(title=track.title, length=track.length)
if not matches:
logger.debug(f"Cache-search: no match for {track.display_name()}")
return FetchResult(synced=res_synced, unsynced=res_unsynced)
if not res_synced:
res_synced = self._get_fuzzy(matches, track, synced=True)
if not res_unsynced:
res_unsynced = self._get_fuzzy(matches, track, synced=False)
return FetchResult(synced=res_synced, unsynced=res_unsynced)
+28 -13
View File
@@ -12,8 +12,8 @@ from loguru import logger
from mutagen._file import File from mutagen._file import File
from mutagen.flac import FLAC from mutagen.flac import FLAC
from .base import BaseFetcher from .base import BaseFetcher, FetchResult
from ..models import TrackMeta, LyricResult from ..models import CacheStatus, TrackMeta, LyricResult
from ..lrc import get_audio_path, get_sidecar_path, LRCData from ..lrc import get_audio_path, get_sidecar_path, LRCData
@@ -25,17 +25,18 @@ class LocalFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool: def is_available(self, track: TrackMeta) -> bool:
return track.is_local return track.is_local
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Attempt to read lyrics from local filesystem.""" """Attempt to read lyrics from local filesystem."""
if not track.is_local or not track.url: if not track.is_local or not track.url:
return None return FetchResult()
audio_path = get_audio_path(track.url, ensure_exists=False) audio_path = get_audio_path(track.url, ensure_exists=False)
if not audio_path: if not audio_path:
logger.debug(f"Local: audio URL is not a valid file path: {track.url}") logger.debug(f"Local: audio URL is not a valid file path: {track.url}")
return None return FetchResult()
synced_result: Optional[LyricResult] = None
unsynced_result: Optional[LyricResult] = None
lrc_path = get_sidecar_path( lrc_path = get_sidecar_path(
track.url, ensure_audio_exists=False, ensure_exists=True track.url, ensure_audio_exists=False, ensure_exists=True
@@ -50,11 +51,19 @@ class LocalFetcher(BaseFetcher):
logger.info( logger.info(
f"Local: found .lrc sidecar ({status.value}) for {audio_path.name}" f"Local: found .lrc sidecar ({status.value}) for {audio_path.name}"
) )
return LyricResult( if status == CacheStatus.SUCCESS_SYNCED:
synced_result = LyricResult(
status=status, status=status,
lyrics=lrc, lyrics=lrc,
source=self.source_name, source=f"{self.source_name} (sidecar)",
) )
else:
unsynced_result = LyricResult(
status=status,
lyrics=lrc,
source=f"{self.source_name} (sidecar)",
)
except Exception as e: except Exception as e:
logger.error(f"Local: error reading {lrc_path}: {e}") logger.error(f"Local: error reading {lrc_path}: {e}")
else: else:
@@ -63,7 +72,7 @@ class LocalFetcher(BaseFetcher):
# Embedded metadata # Embedded metadata
if not audio_path.exists(): if not audio_path.exists():
logger.debug(f"Local: audio file does not exist: {audio_path}") logger.debug(f"Local: audio file does not exist: {audio_path}")
return None else:
try: try:
audio = File(audio_path) audio = File(audio_path)
if audio is not None: if audio is not None:
@@ -87,7 +96,14 @@ class LocalFetcher(BaseFetcher):
logger.info( logger.info(
f"Local: found embedded lyrics ({status.value}) for {audio_path.name}" f"Local: found embedded lyrics ({status.value}) for {audio_path.name}"
) )
return LyricResult( if status == CacheStatus.SUCCESS_SYNCED and not synced_result:
synced_result = LyricResult(
status=status,
lyrics=lrc,
source=f"{self.source_name} (embedded)",
)
elif not unsynced_result:
unsynced_result = LyricResult(
status=status, status=status,
lyrics=lrc, lyrics=lrc,
source=f"{self.source_name} (embedded)", source=f"{self.source_name} (embedded)",
@@ -97,5 +113,4 @@ class LocalFetcher(BaseFetcher):
except Exception as e: except Exception as e:
logger.error(f"Local: error reading metadata for {audio_path}: {e}") logger.error(f"Local: error reading metadata for {audio_path}: {e}")
logger.debug(f"Local: no lyrics found for {audio_path}") return FetchResult(synced=synced_result, unsynced=unsynced_result)
return None
+21 -22
View File
@@ -5,19 +5,17 @@ Description: LRCLIB fetcher — queries lrclib.net for synced/plain lyrics.
Requires complete track metadata (artist, title, album, duration). Requires complete track metadata (artist, title, album, duration).
""" """
from typing import Optional
import httpx import httpx
from loguru import logger from loguru import logger
from urllib.parse import urlencode from urllib.parse import urlencode
from .base import BaseFetcher from .base import BaseFetcher, FetchResult
from ..models import TrackMeta, LyricResult, CacheStatus from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData from ..lrc import LRCData
from ..config import ( from ..config import (
HTTP_TIMEOUT, HTTP_TIMEOUT,
TTL_UNSYNCED, TTL_UNSYNCED,
TTL_NOT_FOUND, TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
UA_LRX, UA_LRX,
) )
@@ -32,13 +30,11 @@ class LrclibFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool: def is_available(self, track: TrackMeta) -> bool:
return track.is_complete return track.is_complete
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Fetch lyrics from LRCLIB. Requires complete metadata.""" """Fetch lyrics from LRCLIB. Requires complete metadata."""
if not track.is_complete: if not track.is_complete:
logger.debug("LRCLIB: skipped — incomplete metadata") logger.debug("LRCLIB: skipped — incomplete metadata")
return None return FetchResult()
params = { params = {
"track_name": track.title, "track_name": track.title,
@@ -55,48 +51,51 @@ class LrclibFetcher(BaseFetcher):
if resp.status_code == 404: if resp.status_code == 404:
logger.debug(f"LRCLIB: not found for {track.display_name()}") logger.debug(f"LRCLIB: not found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
if resp.status_code != 200: if resp.status_code != 200:
logger.error(f"LRCLIB: API returned {resp.status_code}") logger.error(f"LRCLIB: API returned {resp.status_code}")
return LyricResult( return FetchResult.from_network_error()
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
data = resp.json() data = resp.json()
if not isinstance(data, dict): if not isinstance(data, dict):
logger.error(f"LRCLIB: unexpected response type: {type(data).__name__}") logger.error(f"LRCLIB: unexpected response type: {type(data).__name__}")
return LyricResult( return FetchResult.from_network_error()
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
synced = data.get("syncedLyrics") synced = data.get("syncedLyrics")
unsynced = data.get("plainLyrics") unsynced = data.get("plainLyrics")
res_synced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
res_unsynced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
if isinstance(synced, str) and synced.strip(): if isinstance(synced, str) and synced.strip():
lyrics = LRCData(synced) lyrics = LRCData(synced)
logger.info(f"LRCLIB: got synced lyrics ({len(lyrics)} lines)") logger.info(f"LRCLIB: got synced lyrics ({len(lyrics)} lines)")
return LyricResult( res_synced = LyricResult(
status=CacheStatus.SUCCESS_SYNCED, status=CacheStatus.SUCCESS_SYNCED,
lyrics=lyrics, lyrics=lyrics,
source=self.source_name, source=self.source_name,
) )
elif isinstance(unsynced, str) and unsynced.strip():
if isinstance(unsynced, str) and unsynced.strip():
lyrics = LRCData(unsynced) lyrics = LRCData(unsynced)
logger.info(f"LRCLIB: got unsynced lyrics ({len(lyrics)} lines)") logger.info(f"LRCLIB: got unsynced lyrics ({len(lyrics)} lines)")
return LyricResult( res_unsynced = LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED, status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lyrics, lyrics=lyrics,
source=self.source_name, source=self.source_name,
ttl=TTL_UNSYNCED, ttl=TTL_UNSYNCED,
) )
else:
logger.debug(f"LRCLIB: empty response for {track.display_name()}") return FetchResult(synced=res_synced, unsynced=res_unsynced)
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
except httpx.HTTPError as e: except httpx.HTTPError as e:
logger.error(f"LRCLIB: HTTP error: {e}") logger.error(f"LRCLIB: HTTP error: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) return FetchResult.from_network_error()
except Exception as e: except Exception as e:
logger.error(f"LRCLIB: unexpected error: {e}") logger.error(f"LRCLIB: unexpected error: {e}")
return None return FetchResult()
+21 -20
View File
@@ -7,11 +7,10 @@ Description: LRCLIB search fetcher — fuzzy search via lrclib.net /api/search.
import asyncio import asyncio
import httpx import httpx
from typing import Optional
from loguru import logger from loguru import logger
from urllib.parse import urlencode from urllib.parse import urlencode
from .base import BaseFetcher from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_best from .selection import SearchCandidate, select_best
from ..models import TrackMeta, LyricResult, CacheStatus from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData from ..lrc import LRCData
@@ -19,7 +18,6 @@ from ..config import (
HTTP_TIMEOUT, HTTP_TIMEOUT,
TTL_UNSYNCED, TTL_UNSYNCED,
TTL_NOT_FOUND, TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
UA_LRX, UA_LRX,
) )
@@ -62,12 +60,10 @@ class LrclibSearchFetcher(BaseFetcher):
return queries return queries
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
if not track.title: if not track.title:
logger.debug("LRCLIB-search: skipped — no title") logger.debug("LRCLIB-search: skipped — no title")
return None return FetchResult()
queries = self._build_queries(track) queries = self._build_queries(track)
logger.info(f"LRCLIB-search: searching for {track.display_name()}") logger.info(f"LRCLIB-search: searching for {track.display_name()}")
@@ -110,11 +106,9 @@ class LrclibSearchFetcher(BaseFetcher):
if not candidates: if not candidates:
if had_error: if had_error:
return LyricResult( return FetchResult.from_network_error()
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
logger.debug(f"LRCLIB-search: no results for {track.display_name()}") logger.debug(f"LRCLIB-search: no results for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
logger.debug( logger.debug(
f"LRCLIB-search: got {len(candidates)} unique candidates " f"LRCLIB-search: got {len(candidates)} unique candidates "
@@ -144,41 +138,48 @@ class LrclibSearchFetcher(BaseFetcher):
) )
if best is None: if best is None:
logger.debug("LRCLIB-search: no valid candidate found") logger.debug("LRCLIB-search: no valid candidate found")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
synced = best.get("syncedLyrics") synced = best.get("syncedLyrics")
unsynced = best.get("plainLyrics") unsynced = best.get("plainLyrics")
res_synced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
res_unsynced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
if isinstance(synced, str) and synced.strip(): if isinstance(synced, str) and synced.strip():
lyrics = LRCData(synced) lyrics = LRCData(synced)
logger.info( logger.info(
f"LRCLIB-search: got synced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})" f"LRCLIB-search: got synced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})"
) )
return LyricResult( res_synced = LyricResult(
status=CacheStatus.SUCCESS_SYNCED, status=CacheStatus.SUCCESS_SYNCED,
lyrics=lyrics, lyrics=lyrics,
source=self.source_name, source=self.source_name,
confidence=confidence, confidence=confidence,
) )
elif isinstance(unsynced, str) and unsynced.strip():
if isinstance(unsynced, str) and unsynced.strip():
lyrics = LRCData(unsynced) lyrics = LRCData(unsynced)
logger.info( logger.info(
f"LRCLIB-search: got unsynced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})" f"LRCLIB-search: got unsynced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})"
) )
return LyricResult( res_unsynced = LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED, status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lyrics, lyrics=lyrics,
source=self.source_name, source=self.source_name,
ttl=TTL_UNSYNCED, ttl=TTL_UNSYNCED,
confidence=confidence, confidence=confidence,
) )
else:
logger.debug("LRCLIB-search: best candidate has empty lyrics") return FetchResult(synced=res_synced, unsynced=res_unsynced)
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
except httpx.HTTPError as e: except httpx.HTTPError as e:
logger.error(f"LRCLIB-search: HTTP error: {e}") logger.error(f"LRCLIB-search: HTTP error: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) return FetchResult.from_network_error()
except Exception as e: except Exception as e:
logger.error(f"LRCLIB-search: unexpected error: {e}") logger.error(f"LRCLIB-search: unexpected error: {e}")
return None return FetchResult()
+21 -17
View File
@@ -15,12 +15,11 @@ import json
from typing import Optional from typing import Optional
from loguru import logger from loguru import logger
from .base import BaseFetcher from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_best from .selection import SearchCandidate, select_best
from ..authenticators.musixmatch import MusixmatchAuthenticator from ..authenticators.musixmatch import MusixmatchAuthenticator
from ..lrc import LRCData from ..lrc import LRCData
from ..models import CacheStatus, LyricResult, TrackMeta from ..models import CacheStatus, LyricResult, TrackMeta
from ..config import TTL_NETWORK_ERROR, TTL_NOT_FOUND
_MUSIXMATCH_MACRO_URL = "https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get" _MUSIXMATCH_MACRO_URL = "https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get"
_MUSIXMATCH_SEARCH_URL = "https://apic-desktop.musixmatch.com/ws/1.1/track.search" _MUSIXMATCH_SEARCH_URL = "https://apic-desktop.musixmatch.com/ws/1.1/track.search"
@@ -156,9 +155,7 @@ class MusixmatchSpotifyFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool: def is_available(self, track: TrackMeta) -> bool:
return bool(track.trackid) and not self.auth.is_cooldown() return bool(track.trackid) and not self.auth.is_cooldown()
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
logger.info(f"Musixmatch-Spotify: fetching lyrics for {track.display_name()}") logger.info(f"Musixmatch-Spotify: fetching lyrics for {track.display_name()}")
try: try:
@@ -167,22 +164,27 @@ class MusixmatchSpotifyFetcher(BaseFetcher):
{"track_spotify_id": track.trackid}, # type: ignore[dict-item] {"track_spotify_id": track.trackid}, # type: ignore[dict-item]
) )
except AttributeError: except AttributeError:
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
except Exception as e: except Exception as e:
logger.error(f"Musixmatch-Spotify: fetch failed: {e}") logger.error(f"Musixmatch-Spotify: fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) return FetchResult.from_network_error()
if lrc is None: if lrc is None:
logger.debug( logger.debug(
f"Musixmatch-Spotify: no lyrics found for {track.display_name()}" f"Musixmatch-Spotify: no lyrics found for {track.display_name()}"
) )
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
logger.info(f"Musixmatch-Spotify: got SUCCESS_SYNCED lyrics ({len(lrc)} lines)") logger.info(f"Musixmatch-Spotify: got SUCCESS_SYNCED lyrics ({len(lrc)} lines)")
return LyricResult( return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED, status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrc, lyrics=lrc,
source=self.source_name, source=self.source_name,
),
# Fetching unsynced lyrics is not possible with current endpoint,
# so no need to cache NOT_FOUND to avoid repeated failed attempts
unsynced=None,
) )
@@ -258,38 +260,40 @@ class MusixmatchFetcher(BaseFetcher):
logger.debug("Musixmatch: no suitable candidate found") logger.debug("Musixmatch: no suitable candidate found")
return best_id, confidence return best_id, confidence
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
logger.info(f"Musixmatch: fetching lyrics for {track.display_name()}") logger.info(f"Musixmatch: fetching lyrics for {track.display_name()}")
try: try:
commontrack_id, confidence = await self._search(track) commontrack_id, confidence = await self._search(track)
if commontrack_id is None: if commontrack_id is None:
logger.debug(f"Musixmatch: no match found for {track.display_name()}") logger.debug(f"Musixmatch: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
lrc = await _fetch_macro( lrc = await _fetch_macro(
self.auth, self.auth,
{"commontrack_id": str(commontrack_id)}, {"commontrack_id": str(commontrack_id)},
) )
except AttributeError: except AttributeError:
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
except Exception as e: except Exception as e:
logger.error(f"Musixmatch: fetch failed: {e}") logger.error(f"Musixmatch: fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) return FetchResult.from_network_error()
if lrc is None: if lrc is None:
logger.debug(f"Musixmatch: no lyrics for commontrack_id={commontrack_id}") logger.debug(f"Musixmatch: no lyrics for commontrack_id={commontrack_id}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
logger.info( logger.info(
f"Musixmatch: got SUCCESS_SYNCED lyrics " f"Musixmatch: got SUCCESS_SYNCED lyrics "
f"for commontrack_id={commontrack_id} ({len(lrc)} lines)" f"for commontrack_id={commontrack_id} ({len(lrc)} lines)"
) )
return LyricResult( return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED, status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrc, lyrics=lrc,
source=self.source_name, source=self.source_name,
confidence=confidence, confidence=confidence,
),
# Same as above
unsynced=None,
) )
+56 -22
View File
@@ -8,18 +8,16 @@ Description: Netease Cloud Music fetcher.
""" """
import asyncio import asyncio
from typing import Optional
import httpx import httpx
from loguru import logger from loguru import logger
from .base import BaseFetcher from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_ranked from .selection import SearchCandidate, select_ranked
from ..models import TrackMeta, LyricResult, CacheStatus from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData from ..lrc import LRCData
from ..config import ( from ..config import (
HTTP_TIMEOUT, HTTP_TIMEOUT,
TTL_NOT_FOUND, TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
MULTI_CANDIDATE_DELAY_S, MULTI_CANDIDATE_DELAY_S,
UA_BROWSER, UA_BROWSER,
) )
@@ -112,9 +110,7 @@ class NeteaseFetcher(BaseFetcher):
logger.error(f"Netease: search failed: {e}") logger.error(f"Netease: search failed: {e}")
return [] return []
async def _get_lyric( async def _get_lyric(self, song_id: int, confidence: float = 0.0) -> FetchResult:
self, song_id: int, confidence: float = 0.0
) -> Optional[LyricResult]:
logger.debug(f"Netease: fetching lyrics for song_id={song_id}") logger.debug(f"Netease: fetching lyrics for song_id={song_id}")
try: try:
@@ -141,21 +137,19 @@ class NeteaseFetcher(BaseFetcher):
logger.error( logger.error(
f"Netease: lyric response is not dict: {type(data).__name__}" f"Netease: lyric response is not dict: {type(data).__name__}"
) )
return LyricResult( return FetchResult.from_network_error()
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
lrc_obj = data.get("lrc") lrc_obj = data.get("lrc")
if not isinstance(lrc_obj, dict): if not isinstance(lrc_obj, dict):
logger.debug( logger.debug(
f"Netease: no 'lrc' object in response for song_id={song_id}" f"Netease: no 'lrc' object in response for song_id={song_id}"
) )
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
lrc: str = lrc_obj.get("lyric", "") lrc: str = lrc_obj.get("lyric", "")
if not isinstance(lrc, str) or not lrc.strip(): if not isinstance(lrc, str) or not lrc.strip():
logger.debug(f"Netease: empty lyrics for song_id={song_id}") logger.debug(f"Netease: empty lyrics for song_id={song_id}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
lrcdata = LRCData(lrc) lrcdata = LRCData(lrc)
status = lrcdata.detect_sync_status() status = lrcdata.detect_sync_status()
@@ -163,38 +157,78 @@ class NeteaseFetcher(BaseFetcher):
f"Netease: got {status.value} lyrics for song_id={song_id} " f"Netease: got {status.value} lyrics for song_id={song_id} "
f"({len(lrcdata)} lines)" f"({len(lrcdata)} lines)"
) )
return LyricResult( not_found = LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
status=status, if status == CacheStatus.SUCCESS_SYNCED:
return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrcdata, lyrics=lrcdata,
source=self.source_name, source=self.source_name,
confidence=confidence, confidence=confidence,
),
unsynced=not_found,
)
return FetchResult(
synced=not_found,
unsynced=LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lrcdata,
source=self.source_name,
confidence=confidence,
),
) )
except Exception as e: except Exception as e:
logger.error(f"Netease: lyric fetch failed for song_id={song_id}: {e}") logger.error(f"Netease: lyric fetch failed for song_id={song_id}: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) return FetchResult.from_network_error()
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
query = f"{track.artist or ''} {track.title or ''}".strip() query = f"{track.artist or ''} {track.title or ''}".strip()
if not query: if not query:
logger.debug("Netease: skipped — insufficient metadata") logger.debug("Netease: skipped — insufficient metadata")
return None return FetchResult()
logger.info(f"Netease: fetching lyrics for {track.display_name()}") logger.info(f"Netease: fetching lyrics for {track.display_name()}")
candidates = await self._search(track) candidates = await self._search(track)
if not candidates: if not candidates:
logger.debug(f"Netease: no match found for {track.display_name()}") logger.debug(f"Netease: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
res_synced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
res_unsynced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
for i, (song_id, confidence) in enumerate(candidates): for i, (song_id, confidence) in enumerate(candidates):
if i > 0: if i > 0:
await asyncio.sleep(MULTI_CANDIDATE_DELAY_S) await asyncio.sleep(MULTI_CANDIDATE_DELAY_S)
result = await self._get_lyric(song_id, confidence=confidence) result = await self._get_lyric(song_id, confidence=confidence)
if result is None or result.status == CacheStatus.NETWORK_ERROR: if result.synced and result.synced.status == CacheStatus.NETWORK_ERROR:
return result return result
if result.status != CacheStatus.NOT_FOUND: if result.unsynced and result.unsynced.status == CacheStatus.NETWORK_ERROR:
return result return result
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) if (
res_synced.status == CacheStatus.NOT_FOUND
and result.synced
and result.synced.status == CacheStatus.SUCCESS_SYNCED
):
res_synced = result.synced
if (
res_unsynced.status == CacheStatus.NOT_FOUND
and result.unsynced
and result.unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
res_unsynced = result.unsynced
# Netease API is quite expensive, so we stop after finding synced lyrics,
# instead of trying to find both synced and unsynced versions
if (
res_synced.status == CacheStatus.SUCCESS_SYNCED
# and res_unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
break
return FetchResult(synced=res_synced, unsynced=res_unsynced)
+56 -22
View File
@@ -10,18 +10,16 @@ Description: QQ Music fetcher via self-hosted API proxy.
""" """
import asyncio import asyncio
from typing import Optional
import httpx import httpx
from loguru import logger from loguru import logger
from .base import BaseFetcher from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_ranked from .selection import SearchCandidate, select_ranked
from ..models import TrackMeta, LyricResult, CacheStatus from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData from ..lrc import LRCData
from ..config import ( from ..config import (
HTTP_TIMEOUT, HTTP_TIMEOUT,
TTL_NOT_FOUND, TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
MULTI_CANDIDATE_DELAY_S, MULTI_CANDIDATE_DELAY_S,
) )
@@ -104,9 +102,7 @@ class QQMusicFetcher(BaseFetcher):
logger.error(f"QQMusic: search failed: {e}") logger.error(f"QQMusic: search failed: {e}")
return [] return []
async def _get_lyric( async def _get_lyric(self, mid: str, confidence: float = 0.0) -> FetchResult:
self, mid: str, confidence: float = 0.0
) -> Optional[LyricResult]:
logger.debug(f"QQMusic: fetching lyrics for mid={mid}") logger.debug(f"QQMusic: fetching lyrics for mid={mid}")
try: try:
@@ -120,56 +116,94 @@ class QQMusicFetcher(BaseFetcher):
if data.get("code") != 0: if data.get("code") != 0:
logger.error(f"QQMusic: lyric API error: {data}") logger.error(f"QQMusic: lyric API error: {data}")
return LyricResult( return FetchResult.from_network_error()
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
lrc = data.get("data", {}).get("lyric", "") lrc = data.get("data", {}).get("lyric", "")
if not isinstance(lrc, str) or not lrc.strip(): if not isinstance(lrc, str) or not lrc.strip():
logger.debug(f"QQMusic: empty lyrics for mid={mid}") logger.debug(f"QQMusic: empty lyrics for mid={mid}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
lrcdata = LRCData(lrc) lrcdata = LRCData(lrc)
status = lrcdata.detect_sync_status() status = lrcdata.detect_sync_status()
logger.info( logger.info(
f"QQMusic: got {status.value} lyrics for mid={mid} ({len(lrcdata)} lines)" f"QQMusic: got {status.value} lyrics for mid={mid} ({len(lrcdata)} lines)"
) )
return LyricResult( not_found = LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
status=status, if status == CacheStatus.SUCCESS_SYNCED:
return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrcdata, lyrics=lrcdata,
source=self.source_name, source=self.source_name,
confidence=confidence, confidence=confidence,
),
unsynced=not_found,
)
return FetchResult(
synced=not_found,
unsynced=LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lrcdata,
source=self.source_name,
confidence=confidence,
),
) )
except Exception as e: except Exception as e:
logger.error(f"QQMusic: lyric fetch failed for mid={mid}: {e}") logger.error(f"QQMusic: lyric fetch failed for mid={mid}: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) return FetchResult.from_network_error()
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
if not self.auth.is_configured(): if not self.auth.is_configured():
logger.debug("QQMusic: skipped — Auth not configured") logger.debug("QQMusic: skipped — Auth not configured")
return None return FetchResult()
query = f"{track.artist or ''} {track.title or ''}".strip() query = f"{track.artist or ''} {track.title or ''}".strip()
if not query: if not query:
logger.debug("QQMusic: skipped — insufficient metadata") logger.debug("QQMusic: skipped — insufficient metadata")
return None return FetchResult()
logger.info(f"QQMusic: fetching lyrics for {track.display_name()}") logger.info(f"QQMusic: fetching lyrics for {track.display_name()}")
candidates = await self._search(track) candidates = await self._search(track)
if not candidates: if not candidates:
logger.debug(f"QQMusic: no match found for {track.display_name()}") logger.debug(f"QQMusic: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
res_synced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
res_unsynced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
for i, (mid, confidence) in enumerate(candidates): for i, (mid, confidence) in enumerate(candidates):
if i > 0: if i > 0:
await asyncio.sleep(MULTI_CANDIDATE_DELAY_S) await asyncio.sleep(MULTI_CANDIDATE_DELAY_S)
result = await self._get_lyric(mid, confidence=confidence) result = await self._get_lyric(mid, confidence=confidence)
if result is None or result.status == CacheStatus.NETWORK_ERROR: if result.synced and result.synced.status == CacheStatus.NETWORK_ERROR:
return result return result
if result.status != CacheStatus.NOT_FOUND: if result.unsynced and result.unsynced.status == CacheStatus.NETWORK_ERROR:
return result return result
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) if (
res_synced.status == CacheStatus.NOT_FOUND
and result.synced
and result.synced.status == CacheStatus.SUCCESS_SYNCED
):
res_synced = result.synced
if (
res_unsynced.status == CacheStatus.NOT_FOUND
and result.unsynced
and result.unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
res_unsynced = result.unsynced
# QQMusic API is quite expensive, so we stop after finding synced lyrics,
# instead of trying to find both synced and unsynced versions
if (
res_synced.status == CacheStatus.SUCCESS_SYNCED
# and res_unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
break
return FetchResult(synced=res_synced, unsynced=res_unsynced)
+5 -4
View File
@@ -70,15 +70,13 @@ def _score_candidate(
Scoring works in two tiers: Scoring works in two tiers:
1. **Metadata score** — computed from fields available on *both* sides, Metadata score — computed from fields available on both sides,
then rescaled to fill the 0-90 range so that missing fields don't then rescaled to fill the 0-90 range so that missing fields don't
inflate the score. Fields missing on both sides are simply excluded inflate the score. Fields missing on both sides are simply excluded
from the calculation (neutral). Fields present on only one side from the calculation (neutral). Fields present on only one side
contribute 0 to the numerator but their weight still counts in the contribute 0 to the numerator but their weight still counts in the
denominator (penalty for asymmetric absence). denominator (penalty for asymmetric absence).
2. **Synced bonus** — a flat 10 pts, always applied independently.
Field weights (before rescaling): Field weights (before rescaling):
- Title: 40 - Title: 40
- Artist: 30 - Artist: 30
@@ -141,7 +139,10 @@ def _score_candidate(
metadata_score = 0.0 metadata_score = 0.0
# Synced bonus (always 10 pts, independent of metadata) # Synced bonus (always 10 pts, independent of metadata)
synced_score = _W_SYNCED if c.is_synced else 0.0 # synced_score = _W_SYNCED if c.is_synced else 0.0
# EDIT: synced or not should not affect the score that indicates metadata similarity.
# Always apply synced bonus regardless of is_synced.
synced_score = _W_SYNCED
return metadata_score + synced_score return metadata_score + synced_score
+28 -18
View File
@@ -5,14 +5,13 @@ Description: Spotify fetcher — obtains synced lyrics via Spotify's internal co
""" """
import httpx import httpx
from typing import Optional
from loguru import logger from loguru import logger
from .base import BaseFetcher from .base import BaseFetcher, FetchResult
from ..authenticators.spotify import SpotifyAuthenticator, SPOTIFY_BASE_HEADERS from ..authenticators.spotify import SpotifyAuthenticator, SPOTIFY_BASE_HEADERS
from ..models import TrackMeta, LyricResult, CacheStatus from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData from ..lrc import LRCData
from ..config import HTTP_TIMEOUT, TTL_NOT_FOUND, TTL_NETWORK_ERROR from ..config import HTTP_TIMEOUT, TTL_NOT_FOUND
_SPOTIFY_LYRICS_URL = "https://spclient.wg.spotify.com/color-lyrics/v2/track/" _SPOTIFY_LYRICS_URL = "https://spclient.wg.spotify.com/color-lyrics/v2/track/"
@@ -46,19 +45,17 @@ class SpotifyFetcher(BaseFetcher):
continue continue
return False return False
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
if not track.trackid: if not track.trackid:
logger.debug("Spotify: skipped — no trackid in metadata") logger.debug("Spotify: skipped — no trackid in metadata")
return None return FetchResult()
logger.info(f"Spotify: fetching lyrics for trackid={track.trackid}") logger.info(f"Spotify: fetching lyrics for trackid={track.trackid}")
token = await self.auth.authenticate() token = await self.auth.authenticate()
if not token: if not token:
logger.error("Spotify: cannot fetch lyrics without a token") logger.error("Spotify: cannot fetch lyrics without a token")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) return FetchResult.from_network_error()
url = f"{_SPOTIFY_LYRICS_URL}{track.trackid}?format=json&vocalRemoval=false&market=from_token" url = f"{_SPOTIFY_LYRICS_URL}{track.trackid}?format=json&vocalRemoval=false&market=from_token"
headers = { headers = {
@@ -73,21 +70,17 @@ class SpotifyFetcher(BaseFetcher):
if res.status_code == 404: if res.status_code == 404:
logger.debug(f"Spotify: 404 for trackid={track.trackid}") logger.debug(f"Spotify: 404 for trackid={track.trackid}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
if res.status_code != 200: if res.status_code != 200:
logger.error(f"Spotify: lyrics API returned {res.status_code}") logger.error(f"Spotify: lyrics API returned {res.status_code}")
return LyricResult( return FetchResult.from_network_error()
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
data = res.json() data = res.json()
if not isinstance(data, dict) or "lyrics" not in data: if not isinstance(data, dict) or "lyrics" not in data:
logger.error("Spotify: unexpected lyrics response structure") logger.error("Spotify: unexpected lyrics response structure")
return LyricResult( return FetchResult.from_network_error()
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
lyrics_data = data["lyrics"] lyrics_data = data["lyrics"]
sync_type = lyrics_data.get("syncType", "") sync_type = lyrics_data.get("syncType", "")
@@ -95,7 +88,7 @@ class SpotifyFetcher(BaseFetcher):
if not isinstance(lines, list) or len(lines) == 0: if not isinstance(lines, list) or len(lines) == 0:
logger.debug("Spotify: response contained no lyric lines") logger.debug("Spotify: response contained no lyric lines")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) return FetchResult.from_not_found()
is_synced = sync_type == "LINE_SYNCED" and self._is_truly_synced(lines) is_synced = sync_type == "LINE_SYNCED" and self._is_truly_synced(lines)
@@ -122,8 +115,25 @@ class SpotifyFetcher(BaseFetcher):
) )
logger.info(f"Spotify: got {status.value} lyrics ({len(lrc_lines)} lines)") logger.info(f"Spotify: got {status.value} lyrics ({len(lrc_lines)} lines)")
return LyricResult(status=status, lyrics=content, source=self.source_name) not_found = LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
if is_synced:
return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=content,
source=self.source_name,
),
unsynced=not_found,
)
return FetchResult(
synced=not_found,
unsynced=LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=content,
source=self.source_name,
),
)
except Exception as e: except Exception as e:
logger.error(f"Spotify: lyrics fetch failed: {e}") logger.error(f"Spotify: lyrics fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) return FetchResult.from_network_error()
+8 -2
View File
@@ -271,6 +271,9 @@ class LRCData:
return "\n".join(sorted_lines).strip() return "\n".join(sorted_lines).strip()
def to_unsynced(self):
return LRCData(self.to_plain())
def to_lrc( def to_lrc(
self, self,
plain: bool = False, plain: bool = False,
@@ -279,9 +282,12 @@ class LRCData:
Assumes text has been normalized by normalize. Assumes text has been normalized by normalize.
""" """
ret = self
if not self.is_synced():
ret = self.normalize_unsynced()
if plain: if plain:
return self.to_plain() return ret.to_plain()
return "\n".join(self._lines) return "\n".join(ret._lines)
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]: def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
-5
View File
@@ -10,8 +10,6 @@ from enum import Enum
from typing import Optional, TYPE_CHECKING from typing import Optional, TYPE_CHECKING
from dataclasses import dataclass from dataclasses import dataclass
from .config import SCORE_W_SYNCED
if TYPE_CHECKING: if TYPE_CHECKING:
from .lrc import LRCData from .lrc import LRCData
@@ -69,6 +67,3 @@ class LyricResult:
def __post_init__(self) -> None: def __post_init__(self) -> None:
if self.status in (CacheStatus.NOT_FOUND, CacheStatus.NETWORK_ERROR): if self.status in (CacheStatus.NOT_FOUND, CacheStatus.NETWORK_ERROR):
self.confidence = 0.0 self.confidence = 0.0
if self.status is CacheStatus.SUCCESS_UNSYNCED and self.confidence == 100.0:
# Fix: remove inflated confidence for unsynced results
self.confidence = 100 - SCORE_W_SYNCED
+80 -2
View File
@@ -66,6 +66,63 @@ def test_generate_key_raises_when_metadata_missing() -> None:
) )
def test_migrate_adds_confidence_version_and_boosts_unsynced(tmp_path: Path) -> None:
"""Legacy cache without confidence_version is migrated in-place.
Expected behavior:
- add confidence_version column
- boost SUCCESS_UNSYNCED confidence by +10 with cap at 100
- keep SUCCESS_SYNCED confidence unchanged
"""
db_path = tmp_path / "legacy-cache.db"
with sqlite3.connect(db_path) as conn:
conn.execute(
"""
CREATE TABLE cache (
key TEXT PRIMARY KEY,
source TEXT NOT NULL,
status TEXT NOT NULL,
lyrics TEXT,
created_at INTEGER NOT NULL,
expires_at INTEGER,
artist TEXT,
title TEXT,
album TEXT,
length INTEGER,
confidence REAL
)
"""
)
conn.execute(
"""
INSERT INTO cache
(key, source, status, lyrics, created_at, expires_at, artist, title, album, length, confidence)
VALUES
('u1', 's1', 'SUCCESS_UNSYNCED', 'u1', 1, NULL, 'A', 'T', 'AL', 180000, 85.0),
('u2', 's2', 'SUCCESS_UNSYNCED', 'u2', 1, NULL, 'A', 'T', 'AL', 180000, 98.0),
('s1', 's3', 'SUCCESS_SYNCED', 's1', 1, NULL, 'A', 'T', 'AL', 180000, 70.0)
"""
)
conn.commit()
CacheEngine(str(db_path))
with sqlite3.connect(db_path) as conn:
cols = {r[1] for r in conn.execute("PRAGMA table_info(cache)").fetchall()}
rows = conn.execute(
"SELECT key, status, confidence, confidence_version FROM cache ORDER BY key"
).fetchall()
assert "confidence_version" in cols
by_key = {
k: (status, confidence, version) for k, status, confidence, version in rows
}
assert by_key["u1"] == ("SUCCESS_UNSYNCED", 95.0, 1)
assert by_key["u2"] == ("SUCCESS_UNSYNCED", 100.0, 1)
assert by_key["s1"] == ("SUCCESS_SYNCED", 70.0, 1)
def test_set_and_get_roundtrip_with_ttl( def test_set_and_get_roundtrip_with_ttl(
monkeypatch: pytest.MonkeyPatch, cache_db: CacheEngine monkeypatch: pytest.MonkeyPatch, cache_db: CacheEngine
) -> None: ) -> None:
@@ -218,7 +275,7 @@ def test_find_best_positive_uses_exact_match_and_prefers_synced(
cache_db.set(track, "s1", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "s1")) cache_db.set(track, "s1", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "s1"))
cache_db.set(track, "s2", _result(CacheStatus.SUCCESS_SYNCED, "s", "s2")) cache_db.set(track, "s2", _result(CacheStatus.SUCCESS_SYNCED, "s", "s2"))
best = cache_db.find_best_positive(track) best = cache_db.find_best_positive(track, CacheStatus.SUCCESS_SYNCED)
assert best is not None assert best is not None
assert best.status is CacheStatus.SUCCESS_SYNCED assert best.status is CacheStatus.SUCCESS_SYNCED
@@ -227,6 +284,26 @@ def test_find_best_positive_uses_exact_match_and_prefers_synced(
assert best.source == "cache-search" assert best.source == "cache-search"
def test_find_best_positive_returns_status_specific_results(
cache_db: CacheEngine,
) -> None:
track = _track(artist="Artist", title="Song", album="Album")
cache_db.set(track, "u-high", _result(CacheStatus.SUCCESS_UNSYNCED, "u", "u-high"))
cache_db.set(track, "s-low", _result(CacheStatus.SUCCESS_SYNCED, "s", "s-low"))
cache_db.update_confidence(track, 95.0, "u-high")
cache_db.update_confidence(track, 70.0, "s-low")
best_synced = cache_db.find_best_positive(track, CacheStatus.SUCCESS_SYNCED)
assert best_synced is not None
assert best_synced.status is CacheStatus.SUCCESS_SYNCED
assert str(best_synced.lyrics) == "s"
best_unsynced = cache_db.find_best_positive(track, CacheStatus.SUCCESS_UNSYNCED)
assert best_unsynced is not None
assert best_unsynced.status is CacheStatus.SUCCESS_UNSYNCED
assert str(best_unsynced.lyrics) == "u"
def test_search_by_meta_fuzzy_rules_and_duration_sorting(cache_db: CacheEngine) -> None: def test_search_by_meta_fuzzy_rules_and_duration_sorting(cache_db: CacheEngine) -> None:
# Same logical title/artist after normalization, different length quality. # Same logical title/artist after normalization, different length quality.
base = _track( base = _track(
@@ -296,6 +373,7 @@ def test_search_by_meta_fuzzy_rules_and_duration_sorting(cache_db: CacheEngine)
sources = [r["source"] for r in rows] sources = [r["source"] for r in rows]
assert "negative" not in sources assert "negative" not in sources
assert "far-len" not in sources assert "far-len" not in sources
assert "close-unsynced" in sources
# Sorted by duration diff, then confidence for equal diff. # Sorted by duration diff, then confidence for equal diff.
assert sources[0] == "seed" assert sources[0] == "seed"
assert sources[1] == "close-synced" assert sources[1] == "close-synced"
@@ -314,7 +392,7 @@ def test_update_confidence_targets_specific_source(cache_db: CacheEngine) -> Non
assert updated == 1 assert updated == 1
rows = {r["source"]: r for r in cache_db.query_track(track)} rows = {r["source"]: r for r in cache_db.query_track(track)}
assert rows["s1"]["confidence"] == 75.0 assert rows["s1"]["confidence"] == 75.0
assert rows["s2"]["confidence"] == 90.0 # unchanged (unsynced default) assert rows["s2"]["confidence"] == 100.0 # unchanged default
def test_update_confidence_returns_zero_for_missing_source( def test_update_confidence_returns_zero_for_missing_source(
+12 -5
View File
@@ -37,9 +37,14 @@ def lrc_manager(tmp_path: Path) -> LrcManager:
def _fetch_and_assert( def _fetch_and_assert(
lrc_manager: LrcManager, method: FetcherMethodType, expect_fail: bool = False lrc_manager: LrcManager,
method: FetcherMethodType,
expect_fail: bool = False,
bypass_cache: bool = True,
) -> None: ) -> None:
result = lrc_manager.fetch_for_track(SAMPLE_SPOTIFY_TRACK, force_method=method) result = lrc_manager.fetch_for_track(
SAMPLE_SPOTIFY_TRACK, force_method=method, bypass_cache=bypass_cache
)
if expect_fail: if expect_fail:
assert result is None assert result is None
else: else:
@@ -48,7 +53,7 @@ def _fetch_and_assert(
def test_cache_search_fetcher_without_cache(lrc_manager: LrcManager): def test_cache_search_fetcher_without_cache(lrc_manager: LrcManager):
_fetch_and_assert(lrc_manager, "cache-search", expect_fail=True) _fetch_and_assert(lrc_manager, "cache-search", expect_fail=True, bypass_cache=False)
@pytest.mark.parametrize( @pytest.mark.parametrize(
@@ -68,7 +73,9 @@ def test_cache_search_fetcher_with_fuzzy_metadata(
expected_lrc = "[00:00.01]lyrics" expected_lrc = "[00:00.01]lyrics"
lrc_manager.manual_insert(SAMPLE_SPOTIFY_TRACK, expected_lrc) lrc_manager.manual_insert(SAMPLE_SPOTIFY_TRACK, expected_lrc)
result = lrc_manager.fetch_for_track(query_track, force_method="cache-search") result = lrc_manager.fetch_for_track(
query_track, force_method="cache-search", bypass_cache=False
)
assert result is not None assert result is not None
assert result.lyrics is not None assert result.lyrics is not None
@@ -84,7 +91,7 @@ def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
) )
result = lrc_manager.fetch_for_track( result = lrc_manager.fetch_for_track(
SAMPLE_SPOTIFY_TRACK, force_method="cache-search" SAMPLE_SPOTIFY_TRACK, force_method="cache-search", bypass_cache=False
) )
assert result is not None assert result is not None
+83 -19
View File
@@ -2,10 +2,11 @@ from __future__ import annotations
import asyncio import asyncio
from unittest.mock import patch from unittest.mock import patch
import pytest
from lrx_cli.config import HIGH_CONFIDENCE from lrx_cli.config import HIGH_CONFIDENCE
from lrx_cli.core import LrcManager from lrx_cli.core import LrcManager
from lrx_cli.fetchers.base import BaseFetcher from lrx_cli.fetchers.base import BaseFetcher, FetchResult
from lrx_cli.lrc import LRCData from lrx_cli.lrc import LRCData
from lrx_cli.models import CacheStatus, LyricResult, TrackMeta from lrx_cli.models import CacheStatus, LyricResult, TrackMeta
@@ -41,8 +42,15 @@ def _not_found() -> LyricResult:
return LyricResult(status=CacheStatus.NOT_FOUND) return LyricResult(status=CacheStatus.NOT_FOUND)
def _fr(
synced: LyricResult | None = None,
unsynced: LyricResult | None = None,
) -> FetchResult:
return FetchResult(synced=synced, unsynced=unsynced)
class MockFetcher(BaseFetcher): class MockFetcher(BaseFetcher):
def __init__(self, name: str, result: LyricResult | None, delay: float = 0.0): def __init__(self, name: str, result: FetchResult, delay: float = 0.0):
self._name = name self._name = name
self._result = result self._result = result
self._delay = delay self._delay = delay
@@ -56,9 +64,7 @@ class MockFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool: def is_available(self, track: TrackMeta) -> bool:
return True return True
async def fetch( async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
self, track: TrackMeta, bypass_cache: bool = False
) -> LyricResult | None:
self.called = True self.called = True
try: try:
if self._delay: if self._delay:
@@ -78,8 +84,8 @@ def make_manager(tmp_path) -> LrcManager:
def test_unsynced_does_not_stop_next_group(tmp_path): def test_unsynced_does_not_stop_next_group(tmp_path):
"""Unsynced result should not stop the pipeline — next group must still run.""" """Unsynced result should not stop the pipeline — next group must still run."""
a = MockFetcher("a", _unsynced("a")) a = MockFetcher("a", _fr(unsynced=_unsynced("a")))
b = MockFetcher("b", _synced("b")) b = MockFetcher("b", _fr(synced=_synced("b")))
manager = make_manager(tmp_path) manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]): with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
result = manager.fetch_for_track(_track()) result = manager.fetch_for_track(_track())
@@ -90,8 +96,8 @@ def test_unsynced_does_not_stop_next_group(tmp_path):
def test_trusted_synced_stops_next_group(tmp_path): def test_trusted_synced_stops_next_group(tmp_path):
"""Trusted synced from group1 must prevent group2 from running.""" """Trusted synced from group1 must prevent group2 from running."""
a = MockFetcher("a", _synced("a")) a = MockFetcher("a", _fr(synced=_synced("a")))
b = MockFetcher("b", _synced("b")) b = MockFetcher("b", _fr(synced=_synced("b")))
manager = make_manager(tmp_path) manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]): with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
result = manager.fetch_for_track(_track()) result = manager.fetch_for_track(_track())
@@ -102,8 +108,8 @@ def test_trusted_synced_stops_next_group(tmp_path):
def test_negative_continues_next_group(tmp_path): def test_negative_continues_next_group(tmp_path):
"""NOT_FOUND from group1 must cause group2 to be tried.""" """NOT_FOUND from group1 must cause group2 to be tried."""
a = MockFetcher("a", _not_found()) a = MockFetcher("a", _fr(synced=_not_found(), unsynced=_not_found()))
b = MockFetcher("b", _synced("b")) b = MockFetcher("b", _fr(synced=_synced("b")))
manager = make_manager(tmp_path) manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]): with patch("lrx_cli.core.build_plan", return_value=[[a], [b]]):
result = manager.fetch_for_track(_track()) result = manager.fetch_for_track(_track())
@@ -119,8 +125,8 @@ def test_negative_continues_next_group(tmp_path):
def test_trusted_synced_cancels_sibling(tmp_path): def test_trusted_synced_cancels_sibling(tmp_path):
"""When a fast fetcher returns trusted synced, the slow sibling must be cancelled. """When a fast fetcher returns trusted synced, the slow sibling must be cancelled.
If cancellation is broken this test will block for 10 seconds.""" If cancellation is broken this test will block for 10 seconds."""
fast = MockFetcher("fast", _synced("fast")) fast = MockFetcher("fast", _fr(synced=_synced("fast")))
slow = MockFetcher("slow", _synced("slow"), delay=10.0) slow = MockFetcher("slow", _fr(synced=_synced("slow")), delay=10.0)
manager = make_manager(tmp_path) manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[fast, slow]]): with patch("lrx_cli.core.build_plan", return_value=[[fast, slow]]):
result = manager.fetch_for_track(_track()) result = manager.fetch_for_track(_track())
@@ -132,22 +138,56 @@ def test_trusted_synced_cancels_sibling(tmp_path):
def test_best_confidence_within_group(tmp_path): def test_best_confidence_within_group(tmp_path):
"""When no trusted synced result, highest-confidence result from group is returned.""" """When allow_unsynced=True and no trusted synced result, highest-confidence unsynced is returned."""
low = MockFetcher("low", _unsynced("low", confidence=40.0)) low = MockFetcher("low", _fr(unsynced=_unsynced("low", confidence=40.0)))
high = MockFetcher("high", _unsynced("high", confidence=70.0)) high = MockFetcher("high", _fr(unsynced=_unsynced("high", confidence=70.0)))
manager = make_manager(tmp_path) manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[low, high]]): with patch("lrx_cli.core.build_plan", return_value=[[low, high]]):
result = manager.fetch_for_track(_track()) result = manager.fetch_for_track(_track(), allow_unsynced=True)
assert result is not None assert result is not None
assert result.source == "high" assert result.source == "high"
def test_unsynced_only_returns_none_when_not_allowed(tmp_path):
"""When allow_unsynced=False, unsynced-only pipeline result must be rejected."""
only_unsynced = MockFetcher(
"u",
_fr(unsynced=_unsynced("u", confidence=95.0)),
)
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[only_unsynced]]):
result = manager.fetch_for_track(_track(), allow_unsynced=False)
assert result is None
def test_allow_unsynced_flag_controls_return_type(tmp_path):
"""With both slots available, allow_unsynced controls whether unsynced can be returned."""
dual = MockFetcher(
"dual",
_fr(
synced=_synced("dual", confidence=55.0),
unsynced=_unsynced("dual", confidence=95.0),
),
)
manager = make_manager(tmp_path)
with patch("lrx_cli.core.build_plan", return_value=[[dual]]):
synced_only = manager.fetch_for_track(_track(), allow_unsynced=False)
assert synced_only is not None
assert synced_only.status == CacheStatus.SUCCESS_SYNCED
with patch("lrx_cli.core.build_plan", return_value=[[dual]]):
allow_unsynced = manager.fetch_for_track(_track(), allow_unsynced=True)
assert allow_unsynced is not None
assert allow_unsynced.status == CacheStatus.SUCCESS_UNSYNCED
# Cache interaction # Cache interaction
def test_cache_negative_skips_fetch(tmp_path): def test_cache_negative_skips_fetch(tmp_path):
"""A cached NOT_FOUND entry must prevent the fetcher from being called.""" """A cached NOT_FOUND entry must prevent the fetcher from being called."""
fetcher = MockFetcher("src", _synced("src")) fetcher = MockFetcher("src", _fr(synced=_synced("src")))
manager = make_manager(tmp_path) manager = make_manager(tmp_path)
track = _track() track = _track()
manager.cache.set(track, "src", _not_found(), ttl_seconds=3600) manager.cache.set(track, "src", _not_found(), ttl_seconds=3600)
@@ -159,7 +199,7 @@ def test_cache_negative_skips_fetch(tmp_path):
def test_cache_trusted_synced_no_fetch(tmp_path): def test_cache_trusted_synced_no_fetch(tmp_path):
"""A trusted synced cache hit must be returned without calling the fetcher.""" """A trusted synced cache hit must be returned without calling the fetcher."""
fetcher = MockFetcher("src", None) fetcher = MockFetcher("src", _fr())
manager = make_manager(tmp_path) manager = make_manager(tmp_path)
track = _track() track = _track()
manager.cache.set(track, "src", _synced("src"), ttl_seconds=3600) manager.cache.set(track, "src", _synced("src"), ttl_seconds=3600)
@@ -168,3 +208,27 @@ def test_cache_trusted_synced_no_fetch(tmp_path):
assert not fetcher.called assert not fetcher.called
assert result is not None assert result is not None
assert result.status == CacheStatus.SUCCESS_SYNCED assert result.status == CacheStatus.SUCCESS_SYNCED
@pytest.mark.xfail(
strict=True,
reason=(
"Known limitation: cached unsynced currently blocks live fetch, "
"so allow_unsynced=False may return None instead of fresh synced"
),
)
def test_xfail_cached_unsynced_should_not_block_live_synced_when_unsynced_disallowed(
tmp_path,
):
"""Known gap: cached unsynced prevents re-fetch, so this expected behavior is xfailed."""
fetcher = MockFetcher("src", _fr(synced=_synced("src", confidence=90.0)))
manager = make_manager(tmp_path)
track = _track()
manager.cache.set(track, "src", _unsynced("src", confidence=85.0), ttl_seconds=3600)
with patch("lrx_cli.core.build_plan", return_value=[[fetcher]]):
result = manager.fetch_for_track(track, allow_unsynced=False)
assert fetcher.called
assert result is not None
assert result.status == CacheStatus.SUCCESS_SYNCED
+7 -7
View File
@@ -75,13 +75,13 @@ def test_score_missing_one_side_gives_zero_for_field() -> None:
def test_score_synced_bonus() -> None: def test_score_synced_bonus() -> None:
"""Synced adds 10 points.""" """Synced state does not affect metadata score anymore."""
base = SearchCandidate(item="x", title="My Love", is_synced=False) base = SearchCandidate(item="x", title="My Love", is_synced=False)
synced = SearchCandidate(item="x", title="My Love", is_synced=True) synced = SearchCandidate(item="x", title="My Love", is_synced=True)
diff = _score_candidate(synced, "My Love", None, None, None) - _score_candidate( diff = _score_candidate(synced, "My Love", None, None, None) - _score_candidate(
base, "My Love", None, None, None base, "My Love", None, None, None
) )
assert diff == 10.0 assert diff == 0.0
def test_score_duration_linear_decay() -> None: def test_score_duration_linear_decay() -> None:
@@ -95,11 +95,11 @@ def test_score_duration_linear_decay() -> None:
at_tol = SearchCandidate(item="x", duration_ms=232000.0 + 3000.0) at_tol = SearchCandidate(item="x", duration_ms=232000.0 + 3000.0)
score_edge = _score_candidate(at_tol, None, None, None, 232000) score_edge = _score_candidate(at_tol, None, None, None, 232000)
# Only duration is comparable → rescaled to fill 0-90 # Only duration is comparable → metadata spans 0-90, plus a constant baseline +10
# exact=90, half=45, edge=0 # exact=100, half=55, edge=10
assert score_exact == 90.0 assert score_exact == 100.0
assert score_half == 45.0 assert score_half == 55.0
assert score_edge == 0.0 assert score_edge == 10.0
def test_duration_hard_filter_rejects_all_mismatched() -> None: def test_duration_hard_filter_rejects_all_mismatched() -> None:
Generated
+1 -1
View File
@@ -153,7 +153,7 @@ wheels = [
[[package]] [[package]]
name = "lrx-cli" name = "lrx-cli"
version = "0.5.6" version = "0.6.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "cyclopts" }, { name = "cyclopts" },