refactor: large scale refactor regarding selection & fetchers

This commit is contained in:
2026-04-06 13:37:51 +02:00
parent 69b7f5c60c
commit 0c85af534e
23 changed files with 794 additions and 364 deletions
+29 -4
View File
@@ -6,8 +6,35 @@ Description: Base fetcher class and common interfaces.
from abc import ABC, abstractmethod
from typing import Optional
from dataclasses import dataclass
from ..models import TrackMeta, LyricResult
from ..models import CacheStatus, TrackMeta, LyricResult
@dataclass(frozen=True, slots=True)
class FetchResult:
synced: Optional[LyricResult] = None
unsynced: Optional[LyricResult] = None
@staticmethod
def from_not_found() -> "FetchResult":
return FetchResult(
synced=LyricResult(status=CacheStatus.NOT_FOUND, lyrics=None, source=None),
unsynced=LyricResult(
status=CacheStatus.NOT_FOUND, lyrics=None, source=None
),
)
@staticmethod
def from_network_error() -> "FetchResult":
return FetchResult(
synced=LyricResult(
status=CacheStatus.NETWORK_ERROR, lyrics=None, source=None
),
unsynced=LyricResult(
status=CacheStatus.NETWORK_ERROR, lyrics=None, source=None
),
)
class BaseFetcher(ABC):
@@ -28,8 +55,6 @@ class BaseFetcher(ABC):
pass
@abstractmethod
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
"""Fetch lyrics for the given track. Returns None if unable to fetch."""
pass
+65 -43
View File
@@ -12,7 +12,7 @@ from typing import Optional
from loguru import logger
from .base import BaseFetcher
from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_best
from ..models import TrackMeta, LyricResult, CacheStatus
from ..cache import CacheEngine
@@ -34,65 +34,87 @@ class CacheSearchFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool:
return bool(track.title)
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
if bypass_cache:
logger.debug("Cache-search: bypassed by caller")
return None
if not track.title:
logger.debug("Cache-search: skipped — no title")
return None
# Fast path: exact metadata match (artist+title+album), single SQL query
exact = self._cache.find_best_positive(track)
if exact:
logger.info(f"Cache-search: exact hit ({exact.status.value})")
return exact
# Slow path: fuzzy cross-album search
matches = self._cache.search_by_meta(
title=track.title,
length=track.length,
def _get_exact(self, track: TrackMeta, synced: bool) -> Optional[LyricResult]:
exact = self._cache.find_best_positive(
track,
CacheStatus.SUCCESS_SYNCED if synced else CacheStatus.SUCCESS_UNSYNCED,
)
if exact and exact.lyrics is not None:
logger.info(
f"Cache-search: exact {'synced' if synced else 'unsynced'} hit ({exact.status.value})"
)
return exact
return None
if not matches:
logger.debug(f"Cache-search: no match for {track.display_name()}")
return None
# Pick best by confidence scoring
candidates = [
def _get_fuzzy(
self, matches: list, track: TrackMeta, synced: bool
) -> Optional[LyricResult]:
filtered = [
SearchCandidate(
item=m,
duration_ms=float(m["length"]) if m.get("length") else None,
is_synced=m.get("status") == CacheStatus.SUCCESS_SYNCED.value,
is_synced=synced,
title=m.get("title"),
artist=m.get("artist"),
album=m.get("album"),
)
for m in matches
if m.get("lyrics")
and (synced and m.get("status") == CacheStatus.SUCCESS_SYNCED.value)
or (not synced and m.get("status") == CacheStatus.SUCCESS_UNSYNCED.value)
]
best, confidence = select_best(
candidates,
filtered,
track.length,
title=track.title,
artist=track.artist,
album=track.album,
)
if best and best.get("lyrics") is not None:
status = (
CacheStatus.SUCCESS_SYNCED if synced else CacheStatus.SUCCESS_UNSYNCED
)
logger.info(
f"Cache-search: fuzzy {'synced' if synced else 'unsynced'} hit from "
f"[{best.get('source')}] album={best.get('album')!r} (confidence={confidence:.0f})"
)
return LyricResult(
status=status,
lyrics=LRCData(best["lyrics"]),
source=self.source_name,
confidence=confidence,
)
return None
if not best:
return None
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
if bypass_cache:
logger.debug("Cache-search: bypassed by caller")
return FetchResult()
status = CacheStatus(best["status"])
logger.info(
f"Cache-search: fuzzy hit from [{best.get('source')}] "
f"album={best.get('album')!r} ({status.value}, confidence={confidence:.0f})"
)
return LyricResult(
status=status,
lyrics=LRCData(best["lyrics"]),
source=self.source_name,
confidence=confidence,
)
if not track.title:
logger.debug("Cache-search: skipped — no title")
return FetchResult()
res_synced: Optional[LyricResult] = None
res_unsynced: Optional[LyricResult] = None
# Fast path: exact metadata match (artist+title+album), single SQL query
res_synced = self._get_exact(track, synced=True)
res_unsynced = self._get_exact(track, synced=False)
if res_synced and res_unsynced:
return FetchResult(synced=res_synced, unsynced=res_unsynced)
# Slow path: fuzzy cross-album search
matches = self._cache.search_by_meta(title=track.title, length=track.length)
if not matches:
logger.debug(f"Cache-search: no match for {track.display_name()}")
return FetchResult(synced=res_synced, unsynced=res_unsynced)
if not res_synced:
res_synced = self._get_fuzzy(matches, track, synced=True)
if not res_unsynced:
res_unsynced = self._get_fuzzy(matches, track, synced=False)
return FetchResult(synced=res_synced, unsynced=res_unsynced)
+60 -45
View File
@@ -12,8 +12,8 @@ from loguru import logger
from mutagen._file import File
from mutagen.flac import FLAC
from .base import BaseFetcher
from ..models import TrackMeta, LyricResult
from .base import BaseFetcher, FetchResult
from ..models import CacheStatus, TrackMeta, LyricResult
from ..lrc import get_audio_path, get_sidecar_path, LRCData
@@ -25,17 +25,18 @@ class LocalFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool:
return track.is_local
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
"""Attempt to read lyrics from local filesystem."""
if not track.is_local or not track.url:
return None
return FetchResult()
audio_path = get_audio_path(track.url, ensure_exists=False)
if not audio_path:
logger.debug(f"Local: audio URL is not a valid file path: {track.url}")
return None
return FetchResult()
synced_result: Optional[LyricResult] = None
unsynced_result: Optional[LyricResult] = None
lrc_path = get_sidecar_path(
track.url, ensure_audio_exists=False, ensure_exists=True
@@ -50,11 +51,19 @@ class LocalFetcher(BaseFetcher):
logger.info(
f"Local: found .lrc sidecar ({status.value}) for {audio_path.name}"
)
return LyricResult(
status=status,
lyrics=lrc,
source=self.source_name,
)
if status == CacheStatus.SUCCESS_SYNCED:
synced_result = LyricResult(
status=status,
lyrics=lrc,
source=f"{self.source_name} (sidecar)",
)
else:
unsynced_result = LyricResult(
status=status,
lyrics=lrc,
source=f"{self.source_name} (sidecar)",
)
except Exception as e:
logger.error(f"Local: error reading {lrc_path}: {e}")
else:
@@ -63,39 +72,45 @@ class LocalFetcher(BaseFetcher):
# Embedded metadata
if not audio_path.exists():
logger.debug(f"Local: audio file does not exist: {audio_path}")
return None
try:
audio = File(audio_path)
if audio is not None:
lyrics = None
else:
try:
audio = File(audio_path)
if audio is not None:
lyrics = None
if isinstance(audio, FLAC):
# FLAC stores lyrics in vorbis comment tags
lyrics = (
audio.get("lyrics") or audio.get("unsynclyrics") or [None]
)[0]
elif hasattr(audio, "tags") and audio.tags:
# MP3 / other: look for USLT or SYLT ID3 frames
for key in audio.tags.keys():
if key.startswith("USLT") or key.startswith("SYLT"):
lyrics = str(audio.tags[key])
break
if isinstance(audio, FLAC):
# FLAC stores lyrics in vorbis comment tags
lyrics = (
audio.get("lyrics") or audio.get("unsynclyrics") or [None]
)[0]
elif hasattr(audio, "tags") and audio.tags:
# MP3 / other: look for USLT or SYLT ID3 frames
for key in audio.tags.keys():
if key.startswith("USLT") or key.startswith("SYLT"):
lyrics = str(audio.tags[key])
break
if lyrics:
lrc = LRCData(lyrics)
status = lrc.detect_sync_status()
logger.info(
f"Local: found embedded lyrics ({status.value}) for {audio_path.name}"
)
return LyricResult(
status=status,
lyrics=lrc,
source=f"{self.source_name} (embedded)",
)
else:
logger.debug("Local: no embedded lyrics found")
except Exception as e:
logger.error(f"Local: error reading metadata for {audio_path}: {e}")
if lyrics:
lrc = LRCData(lyrics)
status = lrc.detect_sync_status()
logger.info(
f"Local: found embedded lyrics ({status.value}) for {audio_path.name}"
)
if status == CacheStatus.SUCCESS_SYNCED and not synced_result:
synced_result = LyricResult(
status=status,
lyrics=lrc,
source=f"{self.source_name} (embedded)",
)
elif not unsynced_result:
unsynced_result = LyricResult(
status=status,
lyrics=lrc,
source=f"{self.source_name} (embedded)",
)
else:
logger.debug("Local: no embedded lyrics found")
except Exception as e:
logger.error(f"Local: error reading metadata for {audio_path}: {e}")
logger.debug(f"Local: no lyrics found for {audio_path}")
return None
return FetchResult(synced=synced_result, unsynced=unsynced_result)
+21 -22
View File
@@ -5,19 +5,17 @@ Description: LRCLIB fetcher — queries lrclib.net for synced/plain lyrics.
Requires complete track metadata (artist, title, album, duration).
"""
from typing import Optional
import httpx
from loguru import logger
from urllib.parse import urlencode
from .base import BaseFetcher
from .base import BaseFetcher, FetchResult
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData
from ..config import (
HTTP_TIMEOUT,
TTL_UNSYNCED,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
UA_LRX,
)
@@ -32,13 +30,11 @@ class LrclibFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool:
return track.is_complete
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
"""Fetch lyrics from LRCLIB. Requires complete metadata."""
if not track.is_complete:
logger.debug("LRCLIB: skipped — incomplete metadata")
return None
return FetchResult()
params = {
"track_name": track.title,
@@ -55,48 +51,51 @@ class LrclibFetcher(BaseFetcher):
if resp.status_code == 404:
logger.debug(f"LRCLIB: not found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
if resp.status_code != 200:
logger.error(f"LRCLIB: API returned {resp.status_code}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
return FetchResult.from_network_error()
data = resp.json()
if not isinstance(data, dict):
logger.error(f"LRCLIB: unexpected response type: {type(data).__name__}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
return FetchResult.from_network_error()
synced = data.get("syncedLyrics")
unsynced = data.get("plainLyrics")
res_synced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
res_unsynced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
if isinstance(synced, str) and synced.strip():
lyrics = LRCData(synced)
logger.info(f"LRCLIB: got synced lyrics ({len(lyrics)} lines)")
return LyricResult(
res_synced = LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lyrics,
source=self.source_name,
)
elif isinstance(unsynced, str) and unsynced.strip():
if isinstance(unsynced, str) and unsynced.strip():
lyrics = LRCData(unsynced)
logger.info(f"LRCLIB: got unsynced lyrics ({len(lyrics)} lines)")
return LyricResult(
res_unsynced = LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lyrics,
source=self.source_name,
ttl=TTL_UNSYNCED,
)
else:
logger.debug(f"LRCLIB: empty response for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult(synced=res_synced, unsynced=res_unsynced)
except httpx.HTTPError as e:
logger.error(f"LRCLIB: HTTP error: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
return FetchResult.from_network_error()
except Exception as e:
logger.error(f"LRCLIB: unexpected error: {e}")
return None
return FetchResult()
+21 -20
View File
@@ -7,11 +7,10 @@ Description: LRCLIB search fetcher — fuzzy search via lrclib.net /api/search.
import asyncio
import httpx
from typing import Optional
from loguru import logger
from urllib.parse import urlencode
from .base import BaseFetcher
from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_best
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData
@@ -19,7 +18,6 @@ from ..config import (
HTTP_TIMEOUT,
TTL_UNSYNCED,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
UA_LRX,
)
@@ -62,12 +60,10 @@ class LrclibSearchFetcher(BaseFetcher):
return queries
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
if not track.title:
logger.debug("LRCLIB-search: skipped — no title")
return None
return FetchResult()
queries = self._build_queries(track)
logger.info(f"LRCLIB-search: searching for {track.display_name()}")
@@ -110,11 +106,9 @@ class LrclibSearchFetcher(BaseFetcher):
if not candidates:
if had_error:
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
return FetchResult.from_network_error()
logger.debug(f"LRCLIB-search: no results for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
logger.debug(
f"LRCLIB-search: got {len(candidates)} unique candidates "
@@ -144,41 +138,48 @@ class LrclibSearchFetcher(BaseFetcher):
)
if best is None:
logger.debug("LRCLIB-search: no valid candidate found")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
synced = best.get("syncedLyrics")
unsynced = best.get("plainLyrics")
res_synced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
res_unsynced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
if isinstance(synced, str) and synced.strip():
lyrics = LRCData(synced)
logger.info(
f"LRCLIB-search: got synced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})"
)
return LyricResult(
res_synced = LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lyrics,
source=self.source_name,
confidence=confidence,
)
elif isinstance(unsynced, str) and unsynced.strip():
if isinstance(unsynced, str) and unsynced.strip():
lyrics = LRCData(unsynced)
logger.info(
f"LRCLIB-search: got unsynced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})"
)
return LyricResult(
res_unsynced = LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lyrics,
source=self.source_name,
ttl=TTL_UNSYNCED,
confidence=confidence,
)
else:
logger.debug("LRCLIB-search: best candidate has empty lyrics")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult(synced=res_synced, unsynced=res_unsynced)
except httpx.HTTPError as e:
logger.error(f"LRCLIB-search: HTTP error: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
return FetchResult.from_network_error()
except Exception as e:
logger.error(f"LRCLIB-search: unexpected error: {e}")
return None
return FetchResult()
+28 -24
View File
@@ -15,12 +15,11 @@ import json
from typing import Optional
from loguru import logger
from .base import BaseFetcher
from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_best
from ..authenticators.musixmatch import MusixmatchAuthenticator
from ..lrc import LRCData
from ..models import CacheStatus, LyricResult, TrackMeta
from ..config import TTL_NETWORK_ERROR, TTL_NOT_FOUND
_MUSIXMATCH_MACRO_URL = "https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get"
_MUSIXMATCH_SEARCH_URL = "https://apic-desktop.musixmatch.com/ws/1.1/track.search"
@@ -156,9 +155,7 @@ class MusixmatchSpotifyFetcher(BaseFetcher):
def is_available(self, track: TrackMeta) -> bool:
return bool(track.trackid) and not self.auth.is_cooldown()
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
logger.info(f"Musixmatch-Spotify: fetching lyrics for {track.display_name()}")
try:
@@ -167,22 +164,27 @@ class MusixmatchSpotifyFetcher(BaseFetcher):
{"track_spotify_id": track.trackid}, # type: ignore[dict-item]
)
except AttributeError:
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
except Exception as e:
logger.error(f"Musixmatch-Spotify: fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
return FetchResult.from_network_error()
if lrc is None:
logger.debug(
f"Musixmatch-Spotify: no lyrics found for {track.display_name()}"
)
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
logger.info(f"Musixmatch-Spotify: got SUCCESS_SYNCED lyrics ({len(lrc)} lines)")
return LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrc,
source=self.source_name,
return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrc,
source=self.source_name,
),
# Fetching unsynced lyrics is not possible with current endpoint,
# so no need to cache NOT_FOUND to avoid repeated failed attempts
unsynced=None,
)
@@ -258,38 +260,40 @@ class MusixmatchFetcher(BaseFetcher):
logger.debug("Musixmatch: no suitable candidate found")
return best_id, confidence
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
logger.info(f"Musixmatch: fetching lyrics for {track.display_name()}")
try:
commontrack_id, confidence = await self._search(track)
if commontrack_id is None:
logger.debug(f"Musixmatch: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
lrc = await _fetch_macro(
self.auth,
{"commontrack_id": str(commontrack_id)},
)
except AttributeError:
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
except Exception as e:
logger.error(f"Musixmatch: fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
return FetchResult.from_network_error()
if lrc is None:
logger.debug(f"Musixmatch: no lyrics for commontrack_id={commontrack_id}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
logger.info(
f"Musixmatch: got SUCCESS_SYNCED lyrics "
f"for commontrack_id={commontrack_id} ({len(lrc)} lines)"
)
return LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrc,
source=self.source_name,
confidence=confidence,
return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrc,
source=self.source_name,
confidence=confidence,
),
# Same as above
unsynced=None,
)
+59 -25
View File
@@ -8,18 +8,16 @@ Description: Netease Cloud Music fetcher.
"""
import asyncio
from typing import Optional
import httpx
from loguru import logger
from .base import BaseFetcher
from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_ranked
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData
from ..config import (
HTTP_TIMEOUT,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
MULTI_CANDIDATE_DELAY_S,
UA_BROWSER,
)
@@ -112,9 +110,7 @@ class NeteaseFetcher(BaseFetcher):
logger.error(f"Netease: search failed: {e}")
return []
async def _get_lyric(
self, song_id: int, confidence: float = 0.0
) -> Optional[LyricResult]:
async def _get_lyric(self, song_id: int, confidence: float = 0.0) -> FetchResult:
logger.debug(f"Netease: fetching lyrics for song_id={song_id}")
try:
@@ -141,21 +137,19 @@ class NeteaseFetcher(BaseFetcher):
logger.error(
f"Netease: lyric response is not dict: {type(data).__name__}"
)
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
return FetchResult.from_network_error()
lrc_obj = data.get("lrc")
if not isinstance(lrc_obj, dict):
logger.debug(
f"Netease: no 'lrc' object in response for song_id={song_id}"
)
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
lrc: str = lrc_obj.get("lyric", "")
if not isinstance(lrc, str) or not lrc.strip():
logger.debug(f"Netease: empty lyrics for song_id={song_id}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
lrcdata = LRCData(lrc)
status = lrcdata.detect_sync_status()
@@ -163,38 +157,78 @@ class NeteaseFetcher(BaseFetcher):
f"Netease: got {status.value} lyrics for song_id={song_id} "
f"({len(lrcdata)} lines)"
)
return LyricResult(
status=status,
lyrics=lrcdata,
source=self.source_name,
confidence=confidence,
not_found = LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
if status == CacheStatus.SUCCESS_SYNCED:
return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrcdata,
source=self.source_name,
confidence=confidence,
),
unsynced=not_found,
)
return FetchResult(
synced=not_found,
unsynced=LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lrcdata,
source=self.source_name,
confidence=confidence,
),
)
except Exception as e:
logger.error(f"Netease: lyric fetch failed for song_id={song_id}: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
return FetchResult.from_network_error()
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
query = f"{track.artist or ''} {track.title or ''}".strip()
if not query:
logger.debug("Netease: skipped — insufficient metadata")
return None
return FetchResult()
logger.info(f"Netease: fetching lyrics for {track.display_name()}")
candidates = await self._search(track)
if not candidates:
logger.debug(f"Netease: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
res_synced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
res_unsynced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
for i, (song_id, confidence) in enumerate(candidates):
if i > 0:
await asyncio.sleep(MULTI_CANDIDATE_DELAY_S)
result = await self._get_lyric(song_id, confidence=confidence)
if result is None or result.status == CacheStatus.NETWORK_ERROR:
if result.synced and result.synced.status == CacheStatus.NETWORK_ERROR:
return result
if result.status != CacheStatus.NOT_FOUND:
if result.unsynced and result.unsynced.status == CacheStatus.NETWORK_ERROR:
return result
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
if (
res_synced.status == CacheStatus.NOT_FOUND
and result.synced
and result.synced.status == CacheStatus.SUCCESS_SYNCED
):
res_synced = result.synced
if (
res_unsynced.status == CacheStatus.NOT_FOUND
and result.unsynced
and result.unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
res_unsynced = result.unsynced
# Netease API is quite expensive, so we stop after finding synced lyrics,
# instead of trying to find both synced and unsynced versions
if (
res_synced.status == CacheStatus.SUCCESS_SYNCED
# and res_unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
break
return FetchResult(synced=res_synced, unsynced=res_unsynced)
+59 -25
View File
@@ -10,18 +10,16 @@ Description: QQ Music fetcher via self-hosted API proxy.
"""
import asyncio
from typing import Optional
import httpx
from loguru import logger
from .base import BaseFetcher
from .base import BaseFetcher, FetchResult
from .selection import SearchCandidate, select_ranked
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData
from ..config import (
HTTP_TIMEOUT,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
MULTI_CANDIDATE_DELAY_S,
)
@@ -104,9 +102,7 @@ class QQMusicFetcher(BaseFetcher):
logger.error(f"QQMusic: search failed: {e}")
return []
async def _get_lyric(
self, mid: str, confidence: float = 0.0
) -> Optional[LyricResult]:
async def _get_lyric(self, mid: str, confidence: float = 0.0) -> FetchResult:
logger.debug(f"QQMusic: fetching lyrics for mid={mid}")
try:
@@ -120,56 +116,94 @@ class QQMusicFetcher(BaseFetcher):
if data.get("code") != 0:
logger.error(f"QQMusic: lyric API error: {data}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
return FetchResult.from_network_error()
lrc = data.get("data", {}).get("lyric", "")
if not isinstance(lrc, str) or not lrc.strip():
logger.debug(f"QQMusic: empty lyrics for mid={mid}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
lrcdata = LRCData(lrc)
status = lrcdata.detect_sync_status()
logger.info(
f"QQMusic: got {status.value} lyrics for mid={mid} ({len(lrcdata)} lines)"
)
return LyricResult(
status=status,
lyrics=lrcdata,
source=self.source_name,
confidence=confidence,
not_found = LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
if status == CacheStatus.SUCCESS_SYNCED:
return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrcdata,
source=self.source_name,
confidence=confidence,
),
unsynced=not_found,
)
return FetchResult(
synced=not_found,
unsynced=LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lrcdata,
source=self.source_name,
confidence=confidence,
),
)
except Exception as e:
logger.error(f"QQMusic: lyric fetch failed for mid={mid}: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
return FetchResult.from_network_error()
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
if not self.auth.is_configured():
logger.debug("QQMusic: skipped — Auth not configured")
return None
return FetchResult()
query = f"{track.artist or ''} {track.title or ''}".strip()
if not query:
logger.debug("QQMusic: skipped — insufficient metadata")
return None
return FetchResult()
logger.info(f"QQMusic: fetching lyrics for {track.display_name()}")
candidates = await self._search(track)
if not candidates:
logger.debug(f"QQMusic: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
res_synced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
res_unsynced: LyricResult = LyricResult(
status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND
)
for i, (mid, confidence) in enumerate(candidates):
if i > 0:
await asyncio.sleep(MULTI_CANDIDATE_DELAY_S)
result = await self._get_lyric(mid, confidence=confidence)
if result is None or result.status == CacheStatus.NETWORK_ERROR:
if result.synced and result.synced.status == CacheStatus.NETWORK_ERROR:
return result
if result.status != CacheStatus.NOT_FOUND:
if result.unsynced and result.unsynced.status == CacheStatus.NETWORK_ERROR:
return result
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
if (
res_synced.status == CacheStatus.NOT_FOUND
and result.synced
and result.synced.status == CacheStatus.SUCCESS_SYNCED
):
res_synced = result.synced
if (
res_unsynced.status == CacheStatus.NOT_FOUND
and result.unsynced
and result.unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
res_unsynced = result.unsynced
# QQMusic API is quite expensive, so we stop after finding synced lyrics,
# instead of trying to find both synced and unsynced versions
if (
res_synced.status == CacheStatus.SUCCESS_SYNCED
# and res_unsynced.status == CacheStatus.SUCCESS_UNSYNCED
):
break
return FetchResult(synced=res_synced, unsynced=res_unsynced)
+10 -9
View File
@@ -70,14 +70,12 @@ def _score_candidate(
Scoring works in two tiers:
1. **Metadata score** — computed from fields available on *both* sides,
then rescaled to fill the 0-90 range so that missing fields don't
inflate the score. Fields missing on both sides are simply excluded
from the calculation (neutral). Fields present on only one side
contribute 0 to the numerator but their weight still counts in the
denominator (penalty for asymmetric absence).
2. **Synced bonus** — a flat 10 pts, always applied independently.
Metadata score — computed from fields available on both sides,
then rescaled to fill the 0-90 range so that missing fields don't
inflate the score. Fields missing on both sides are simply excluded
from the calculation (neutral). Fields present on only one side
contribute 0 to the numerator but their weight still counts in the
denominator (penalty for asymmetric absence).
Field weights (before rescaling):
- Title: 40
@@ -141,7 +139,10 @@ def _score_candidate(
metadata_score = 0.0
# Synced bonus (always 10 pts, independent of metadata)
synced_score = _W_SYNCED if c.is_synced else 0.0
# synced_score = _W_SYNCED if c.is_synced else 0.0
# EDIT: synced or not should not affect the score that indicates metadata similarity.
# Always apply synced bonus regardless of is_synced.
synced_score = _W_SYNCED
return metadata_score + synced_score
+28 -18
View File
@@ -5,14 +5,13 @@ Description: Spotify fetcher — obtains synced lyrics via Spotify's internal co
"""
import httpx
from typing import Optional
from loguru import logger
from .base import BaseFetcher
from .base import BaseFetcher, FetchResult
from ..authenticators.spotify import SpotifyAuthenticator, SPOTIFY_BASE_HEADERS
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData
from ..config import HTTP_TIMEOUT, TTL_NOT_FOUND, TTL_NETWORK_ERROR
from ..config import HTTP_TIMEOUT, TTL_NOT_FOUND
_SPOTIFY_LYRICS_URL = "https://spclient.wg.spotify.com/color-lyrics/v2/track/"
@@ -46,19 +45,17 @@ class SpotifyFetcher(BaseFetcher):
continue
return False
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
async def fetch(self, track: TrackMeta, bypass_cache: bool = False) -> FetchResult:
if not track.trackid:
logger.debug("Spotify: skipped — no trackid in metadata")
return None
return FetchResult()
logger.info(f"Spotify: fetching lyrics for trackid={track.trackid}")
token = await self.auth.authenticate()
if not token:
logger.error("Spotify: cannot fetch lyrics without a token")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
return FetchResult.from_network_error()
url = f"{_SPOTIFY_LYRICS_URL}{track.trackid}?format=json&vocalRemoval=false&market=from_token"
headers = {
@@ -73,21 +70,17 @@ class SpotifyFetcher(BaseFetcher):
if res.status_code == 404:
logger.debug(f"Spotify: 404 for trackid={track.trackid}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
if res.status_code != 200:
logger.error(f"Spotify: lyrics API returned {res.status_code}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
return FetchResult.from_network_error()
data = res.json()
if not isinstance(data, dict) or "lyrics" not in data:
logger.error("Spotify: unexpected lyrics response structure")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
return FetchResult.from_network_error()
lyrics_data = data["lyrics"]
sync_type = lyrics_data.get("syncType", "")
@@ -95,7 +88,7 @@ class SpotifyFetcher(BaseFetcher):
if not isinstance(lines, list) or len(lines) == 0:
logger.debug("Spotify: response contained no lyric lines")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return FetchResult.from_not_found()
is_synced = sync_type == "LINE_SYNCED" and self._is_truly_synced(lines)
@@ -122,8 +115,25 @@ class SpotifyFetcher(BaseFetcher):
)
logger.info(f"Spotify: got {status.value} lyrics ({len(lrc_lines)} lines)")
return LyricResult(status=status, lyrics=content, source=self.source_name)
not_found = LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
if is_synced:
return FetchResult(
synced=LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=content,
source=self.source_name,
),
unsynced=not_found,
)
return FetchResult(
synced=not_found,
unsynced=LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=content,
source=self.source_name,
),
)
except Exception as e:
logger.error(f"Spotify: lyrics fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
return FetchResult.from_network_error()