chore: switch to src layout
This commit is contained in:
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-25 02:33:26
|
||||
Description: Fetcher pipeline — registry and types.
|
||||
"""
|
||||
|
||||
from typing import Literal, Optional
|
||||
from loguru import logger
|
||||
|
||||
from .base import BaseFetcher
|
||||
from .local import LocalFetcher
|
||||
from .cache_search import CacheSearchFetcher
|
||||
from .spotify import SpotifyFetcher
|
||||
from .lrclib import LrclibFetcher
|
||||
from .lrclib_search import LrclibSearchFetcher
|
||||
from .musixmatch import MusixmatchFetcher, MusixmatchSpotifyFetcher
|
||||
from .netease import NeteaseFetcher
|
||||
from .qqmusic import QQMusicFetcher
|
||||
from ..authenticators import (
|
||||
BaseAuthenticator,
|
||||
SpotifyAuthenticator,
|
||||
MusixmatchAuthenticator,
|
||||
QQMusicAuthenticator,
|
||||
)
|
||||
from ..cache import CacheEngine
|
||||
from ..models import TrackMeta
|
||||
|
||||
FetcherMethodType = Literal[
|
||||
"local",
|
||||
"cache-search",
|
||||
"spotify",
|
||||
"lrclib",
|
||||
"musixmatch-spotify",
|
||||
"lrclib-search",
|
||||
"netease",
|
||||
"qqmusic",
|
||||
"musixmatch",
|
||||
]
|
||||
|
||||
# Fetchers within a group run in parallel; groups run sequentially.
|
||||
# A group that produces any trusted and synced result stops the pipeline.
|
||||
_FETCHER_GROUPS: list[list[FetcherMethodType]] = [
|
||||
["local"],
|
||||
["cache-search"],
|
||||
["spotify"],
|
||||
["lrclib", "musixmatch-spotify"],
|
||||
["lrclib-search", "musixmatch"],
|
||||
["netease", "qqmusic"],
|
||||
]
|
||||
|
||||
|
||||
def create_fetchers(
|
||||
cache: CacheEngine,
|
||||
authenticators: dict[str, BaseAuthenticator],
|
||||
) -> dict[FetcherMethodType, BaseFetcher]:
|
||||
"""Instantiate all fetchers. Returns a dict keyed by source name."""
|
||||
spotify_auth = authenticators["spotify"]
|
||||
mxm_auth = authenticators["musixmatch"]
|
||||
qqmusic_auth = authenticators.get("qqmusic")
|
||||
assert isinstance(spotify_auth, SpotifyAuthenticator)
|
||||
assert isinstance(mxm_auth, MusixmatchAuthenticator)
|
||||
assert isinstance(qqmusic_auth, QQMusicAuthenticator)
|
||||
fetchers: dict[FetcherMethodType, BaseFetcher] = {
|
||||
"local": LocalFetcher(),
|
||||
"cache-search": CacheSearchFetcher(cache),
|
||||
"spotify": SpotifyFetcher(spotify_auth),
|
||||
"lrclib": LrclibFetcher(),
|
||||
"musixmatch-spotify": MusixmatchSpotifyFetcher(mxm_auth),
|
||||
"lrclib-search": LrclibSearchFetcher(),
|
||||
"netease": NeteaseFetcher(),
|
||||
"qqmusic": QQMusicFetcher(qqmusic_auth),
|
||||
"musixmatch": MusixmatchFetcher(mxm_auth),
|
||||
}
|
||||
return fetchers
|
||||
|
||||
|
||||
def build_plan(
|
||||
fetchers: dict[FetcherMethodType, BaseFetcher],
|
||||
track: TrackMeta,
|
||||
force_method: Optional[FetcherMethodType] = None,
|
||||
) -> list[list[BaseFetcher]]:
|
||||
"""Return the fetch plan as a list of groups (each group runs in parallel)."""
|
||||
if force_method:
|
||||
if force_method not in fetchers:
|
||||
logger.error(f"Unknown method: {force_method}")
|
||||
return []
|
||||
return [[fetchers[force_method]]]
|
||||
|
||||
plan: list[list[BaseFetcher]] = []
|
||||
for group_methods in _FETCHER_GROUPS:
|
||||
group = [
|
||||
fetchers[m]
|
||||
for m in group_methods
|
||||
if m in fetchers and fetchers[m].is_available(track)
|
||||
]
|
||||
if group:
|
||||
plan.append(group)
|
||||
|
||||
logger.debug(f"Fetch plan: {[[f.source_name for f in g] for g in plan]}")
|
||||
return plan
|
||||
@@ -0,0 +1,35 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-25 02:33:26
|
||||
Description: Base fetcher class and common interfaces.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
from ..models import TrackMeta, LyricResult
|
||||
|
||||
|
||||
class BaseFetcher(ABC):
|
||||
@property
|
||||
@abstractmethod
|
||||
def source_name(self) -> str:
|
||||
"""Name of the fetcher source."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def self_cached(self) -> bool:
|
||||
"""True if this fetcher manages its own cache (skip per-source cache check)."""
|
||||
return False
|
||||
|
||||
@abstractmethod
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
"""Check if the fetcher is available for the given track (e.g. has required metadata)."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
"""Fetch lyrics for the given track. Returns None if unable to fetch."""
|
||||
pass
|
||||
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-28 05:57:46
|
||||
Description: Cache-search fetcher — cross-album fuzzy lookup in the local cache.
|
||||
|
||||
Searches existing cache entries by artist + title with fuzzy normalization,
|
||||
ignoring album and source. Useful when the same track appears on different
|
||||
albums or is played from different players.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
|
||||
|
||||
from .base import BaseFetcher
|
||||
from .selection import SearchCandidate, select_best
|
||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||
from ..cache import CacheEngine
|
||||
from ..lrc import LRCData
|
||||
|
||||
|
||||
class CacheSearchFetcher(BaseFetcher):
|
||||
def __init__(self, cache: CacheEngine) -> None:
|
||||
self._cache = cache
|
||||
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "cache-search"
|
||||
|
||||
@property
|
||||
def self_cached(self) -> bool:
|
||||
return True
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return bool(track.title)
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
if bypass_cache:
|
||||
logger.debug("Cache-search: bypassed by caller")
|
||||
return None
|
||||
|
||||
if not track.title:
|
||||
logger.debug("Cache-search: skipped — no title")
|
||||
return None
|
||||
|
||||
# Fast path: exact metadata match (artist+title+album), single SQL query
|
||||
exact = self._cache.find_best_positive(track)
|
||||
if exact:
|
||||
logger.info(f"Cache-search: exact hit ({exact.status.value})")
|
||||
return exact
|
||||
|
||||
# Slow path: fuzzy cross-album search
|
||||
matches = self._cache.search_by_meta(
|
||||
title=track.title,
|
||||
length=track.length,
|
||||
)
|
||||
|
||||
if not matches:
|
||||
logger.debug(f"Cache-search: no match for {track.display_name()}")
|
||||
return None
|
||||
|
||||
# Pick best by confidence scoring
|
||||
candidates = [
|
||||
SearchCandidate(
|
||||
item=m,
|
||||
duration_ms=float(m["length"]) if m.get("length") else None,
|
||||
is_synced=m.get("status") == CacheStatus.SUCCESS_SYNCED.value,
|
||||
title=m.get("title"),
|
||||
artist=m.get("artist"),
|
||||
album=m.get("album"),
|
||||
)
|
||||
for m in matches
|
||||
if m.get("lyrics")
|
||||
]
|
||||
best, confidence = select_best(
|
||||
candidates,
|
||||
track.length,
|
||||
title=track.title,
|
||||
artist=track.artist,
|
||||
album=track.album,
|
||||
)
|
||||
|
||||
if not best:
|
||||
return None
|
||||
|
||||
status = CacheStatus(best["status"])
|
||||
logger.info(
|
||||
f"Cache-search: fuzzy hit from [{best.get('source')}] "
|
||||
f"album={best.get('album')!r} ({status.value}, confidence={confidence:.0f})"
|
||||
)
|
||||
return LyricResult(
|
||||
status=status,
|
||||
lyrics=LRCData(best["lyrics"]),
|
||||
source=self.source_name,
|
||||
confidence=confidence,
|
||||
)
|
||||
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-26 02:08:41
|
||||
Description: Local fetcher — reads lyrics from .lrc sidecar files or embedded audio metadata.
|
||||
Priority:
|
||||
1. Same-directory .lrc file (e.g. /path/to/track.lrc)
|
||||
2. Embedded lyrics in audio metadata (FLAC, MP3 USLT/SYLT tags)
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
from mutagen._file import File
|
||||
from mutagen.flac import FLAC
|
||||
|
||||
from .base import BaseFetcher
|
||||
from ..models import TrackMeta, LyricResult
|
||||
from ..lrc import get_audio_path, get_sidecar_path, LRCData
|
||||
|
||||
|
||||
class LocalFetcher(BaseFetcher):
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "local"
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return track.is_local
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
"""Attempt to read lyrics from local filesystem."""
|
||||
if not track.is_local or not track.url:
|
||||
return None
|
||||
|
||||
audio_path = get_audio_path(track.url, ensure_exists=False)
|
||||
if not audio_path:
|
||||
logger.debug(f"Local: audio URL is not a valid file path: {track.url}")
|
||||
return None
|
||||
|
||||
lrc_path = get_sidecar_path(
|
||||
track.url, ensure_audio_exists=False, ensure_exists=True
|
||||
)
|
||||
if lrc_path:
|
||||
try:
|
||||
with open(lrc_path, "r", encoding="utf-8") as f:
|
||||
content = f.read().strip()
|
||||
if content:
|
||||
lrc = LRCData(content)
|
||||
status = lrc.detect_sync_status()
|
||||
logger.info(
|
||||
f"Local: found .lrc sidecar ({status.value}) for {audio_path.name}"
|
||||
)
|
||||
return LyricResult(
|
||||
status=status,
|
||||
lyrics=lrc,
|
||||
source=self.source_name,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Local: error reading {lrc_path}: {e}")
|
||||
else:
|
||||
logger.debug(f"Local: no .lrc sidecar found for {audio_path}")
|
||||
|
||||
# Embedded metadata
|
||||
if not audio_path.exists():
|
||||
logger.debug(f"Local: audio file does not exist: {audio_path}")
|
||||
return None
|
||||
try:
|
||||
audio = File(audio_path)
|
||||
if audio is not None:
|
||||
lyrics = None
|
||||
|
||||
if isinstance(audio, FLAC):
|
||||
# FLAC stores lyrics in vorbis comment tags
|
||||
lyrics = (
|
||||
audio.get("lyrics") or audio.get("unsynclyrics") or [None]
|
||||
)[0]
|
||||
elif hasattr(audio, "tags") and audio.tags:
|
||||
# MP3 / other: look for USLT or SYLT ID3 frames
|
||||
for key in audio.tags.keys():
|
||||
if key.startswith("USLT") or key.startswith("SYLT"):
|
||||
lyrics = str(audio.tags[key])
|
||||
break
|
||||
|
||||
if lyrics:
|
||||
lrc = LRCData(lyrics)
|
||||
status = lrc.detect_sync_status()
|
||||
logger.info(
|
||||
f"Local: found embedded lyrics ({status.value}) for {audio_path.name}"
|
||||
)
|
||||
return LyricResult(
|
||||
status=status,
|
||||
lyrics=lrc,
|
||||
source=f"{self.source_name} (embedded)",
|
||||
)
|
||||
else:
|
||||
logger.debug("Local: no embedded lyrics found")
|
||||
except Exception as e:
|
||||
logger.error(f"Local: error reading metadata for {audio_path}: {e}")
|
||||
|
||||
logger.debug(f"Local: no lyrics found for {audio_path}")
|
||||
return None
|
||||
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-25 05:23:38
|
||||
Description: LRCLIB fetcher — queries lrclib.net for synced/plain lyrics.
|
||||
Requires complete track metadata (artist, title, album, duration).
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
import httpx
|
||||
from loguru import logger
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from .base import BaseFetcher
|
||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||
from ..lrc import LRCData
|
||||
from ..config import (
|
||||
HTTP_TIMEOUT,
|
||||
TTL_UNSYNCED,
|
||||
TTL_NOT_FOUND,
|
||||
TTL_NETWORK_ERROR,
|
||||
UA_LRX,
|
||||
)
|
||||
|
||||
_LRCLIB_API_URL = "https://lrclib.net/api/get"
|
||||
|
||||
|
||||
class LrclibFetcher(BaseFetcher):
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "lrclib"
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return track.is_complete
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
"""Fetch lyrics from LRCLIB. Requires complete metadata."""
|
||||
if not track.is_complete:
|
||||
logger.debug("LRCLIB: skipped — incomplete metadata")
|
||||
return None
|
||||
|
||||
params = {
|
||||
"track_name": track.title,
|
||||
"artist_name": track.artist,
|
||||
"album_name": track.album,
|
||||
"duration": track.length / 1000.0 if track.length else 0,
|
||||
}
|
||||
url = f"{_LRCLIB_API_URL}?{urlencode(params)}"
|
||||
logger.info(f"LRCLIB: fetching lyrics for {track.display_name()}")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
|
||||
resp = await client.get(url, headers={"User-Agent": UA_LRX})
|
||||
|
||||
if resp.status_code == 404:
|
||||
logger.debug(f"LRCLIB: not found for {track.display_name()}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"LRCLIB: API returned {resp.status_code}")
|
||||
return LyricResult(
|
||||
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
|
||||
)
|
||||
|
||||
data = resp.json()
|
||||
if not isinstance(data, dict):
|
||||
logger.error(f"LRCLIB: unexpected response type: {type(data).__name__}")
|
||||
return LyricResult(
|
||||
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
|
||||
)
|
||||
|
||||
synced = data.get("syncedLyrics")
|
||||
unsynced = data.get("plainLyrics")
|
||||
|
||||
if isinstance(synced, str) and synced.strip():
|
||||
lyrics = LRCData(synced)
|
||||
logger.info(f"LRCLIB: got synced lyrics ({len(lyrics)} lines)")
|
||||
return LyricResult(
|
||||
status=CacheStatus.SUCCESS_SYNCED,
|
||||
lyrics=lyrics,
|
||||
source=self.source_name,
|
||||
)
|
||||
elif isinstance(unsynced, str) and unsynced.strip():
|
||||
lyrics = LRCData(unsynced)
|
||||
logger.info(f"LRCLIB: got unsynced lyrics ({len(lyrics)} lines)")
|
||||
return LyricResult(
|
||||
status=CacheStatus.SUCCESS_UNSYNCED,
|
||||
lyrics=lyrics,
|
||||
source=self.source_name,
|
||||
ttl=TTL_UNSYNCED,
|
||||
)
|
||||
else:
|
||||
logger.debug(f"LRCLIB: empty response for {track.display_name()}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"LRCLIB: HTTP error: {e}")
|
||||
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
|
||||
except Exception as e:
|
||||
logger.error(f"LRCLIB: unexpected error: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-25 05:30:50
|
||||
Description: LRCLIB search fetcher — fuzzy search via lrclib.net /api/search.
|
||||
Used when metadata is incomplete (no album or duration) but title is available.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import httpx
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from .base import BaseFetcher
|
||||
from .selection import SearchCandidate, select_best
|
||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||
from ..lrc import LRCData
|
||||
from ..config import (
|
||||
HTTP_TIMEOUT,
|
||||
TTL_UNSYNCED,
|
||||
TTL_NOT_FOUND,
|
||||
TTL_NETWORK_ERROR,
|
||||
UA_LRX,
|
||||
)
|
||||
|
||||
_LRCLIB_SEARCH_URL = "https://lrclib.net/api/search"
|
||||
|
||||
|
||||
class LrclibSearchFetcher(BaseFetcher):
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "lrclib-search"
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return bool(track.title)
|
||||
|
||||
def _build_queries(self, track: TrackMeta) -> list[dict[str, str]]:
|
||||
"""Build up to 4 query param sets, from most specific to least.
|
||||
|
||||
1. title + artist + album (if all present)
|
||||
2. title + artist (if artist present)
|
||||
3. title + album (if album present)
|
||||
4. title only
|
||||
"""
|
||||
assert track.title is not None
|
||||
title = track.title
|
||||
queries: list[dict[str, str]] = []
|
||||
|
||||
if track.artist and track.album:
|
||||
queries.append(
|
||||
{
|
||||
"track_name": title,
|
||||
"artist_name": track.artist,
|
||||
"album_name": track.album,
|
||||
}
|
||||
)
|
||||
if track.artist:
|
||||
queries.append({"track_name": title, "artist_name": track.artist})
|
||||
if track.album:
|
||||
queries.append({"track_name": title, "album_name": track.album})
|
||||
queries.append({"track_name": title})
|
||||
|
||||
return queries
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
if not track.title:
|
||||
logger.debug("LRCLIB-search: skipped — no title")
|
||||
return None
|
||||
|
||||
queries = self._build_queries(track)
|
||||
logger.info(f"LRCLIB-search: searching for {track.display_name()}")
|
||||
|
||||
seen_ids: set[int] = set()
|
||||
candidates: list[dict] = []
|
||||
had_error = False
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
|
||||
|
||||
async def _query(params: dict[str, str]) -> tuple[list[dict], bool]:
|
||||
url = f"{_LRCLIB_SEARCH_URL}?{urlencode(params)}"
|
||||
logger.debug(f"LRCLIB-search: query {params}")
|
||||
try:
|
||||
resp = await client.get(url, headers={"User-Agent": UA_LRX})
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"LRCLIB-search: HTTP error: {e}")
|
||||
return [], True
|
||||
if resp.status_code != 200:
|
||||
logger.error(f"LRCLIB-search: API returned {resp.status_code}")
|
||||
return [], True
|
||||
data = resp.json()
|
||||
if not isinstance(data, list):
|
||||
return [], False
|
||||
return [item for item in data if isinstance(item, dict)], False
|
||||
|
||||
all_results = await asyncio.gather(*(_query(p) for p in queries))
|
||||
|
||||
for items, err in all_results:
|
||||
if err:
|
||||
had_error = True
|
||||
for item in items:
|
||||
item_id = item.get("id")
|
||||
if item_id is not None and item_id in seen_ids:
|
||||
continue
|
||||
if item_id is not None:
|
||||
seen_ids.add(item_id)
|
||||
candidates.append(item)
|
||||
|
||||
if not candidates:
|
||||
if had_error:
|
||||
return LyricResult(
|
||||
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
|
||||
)
|
||||
logger.debug(f"LRCLIB-search: no results for {track.display_name()}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
logger.debug(
|
||||
f"LRCLIB-search: got {len(candidates)} unique candidates "
|
||||
f"from {len(queries)} queries"
|
||||
)
|
||||
|
||||
mapped = [
|
||||
SearchCandidate(
|
||||
item=item,
|
||||
duration_ms=item["duration"] * 1000
|
||||
if isinstance(item.get("duration"), (int, float))
|
||||
else None,
|
||||
is_synced=isinstance(item.get("syncedLyrics"), str)
|
||||
and bool(item["syncedLyrics"].strip()),
|
||||
title=item.get("trackName"),
|
||||
artist=item.get("artistName"),
|
||||
album=item.get("albumName"),
|
||||
)
|
||||
for item in candidates
|
||||
]
|
||||
best, confidence = select_best(
|
||||
mapped,
|
||||
track.length,
|
||||
title=track.title,
|
||||
artist=track.artist,
|
||||
album=track.album,
|
||||
)
|
||||
if best is None:
|
||||
logger.debug("LRCLIB-search: no valid candidate found")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
synced = best.get("syncedLyrics")
|
||||
unsynced = best.get("plainLyrics")
|
||||
|
||||
if isinstance(synced, str) and synced.strip():
|
||||
lyrics = LRCData(synced)
|
||||
logger.info(
|
||||
f"LRCLIB-search: got synced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})"
|
||||
)
|
||||
return LyricResult(
|
||||
status=CacheStatus.SUCCESS_SYNCED,
|
||||
lyrics=lyrics,
|
||||
source=self.source_name,
|
||||
confidence=confidence,
|
||||
)
|
||||
elif isinstance(unsynced, str) and unsynced.strip():
|
||||
lyrics = LRCData(unsynced)
|
||||
logger.info(
|
||||
f"LRCLIB-search: got unsynced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})"
|
||||
)
|
||||
return LyricResult(
|
||||
status=CacheStatus.SUCCESS_UNSYNCED,
|
||||
lyrics=lyrics,
|
||||
source=self.source_name,
|
||||
ttl=TTL_UNSYNCED,
|
||||
confidence=confidence,
|
||||
)
|
||||
else:
|
||||
logger.debug("LRCLIB-search: best candidate has empty lyrics")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"LRCLIB-search: HTTP error: {e}")
|
||||
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
|
||||
except Exception as e:
|
||||
logger.error(f"LRCLIB-search: unexpected error: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,295 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-04-04 15:28:34
|
||||
Description: Musixmatch fetchers (desktop API, anonymous or usertoken auth).
|
||||
|
||||
Uses the Musixmatch desktop API (apic-desktop.musixmatch.com).
|
||||
Token and all HTTP calls are managed by MusixmatchAuthenticator.
|
||||
|
||||
Two fetchers:
|
||||
musixmatch-spotify — direct lookup by Spotify track ID (exact, no search)
|
||||
musixmatch — metadata search + best-candidate fallback
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
|
||||
from .base import BaseFetcher
|
||||
from .selection import SearchCandidate, select_best
|
||||
from ..authenticators.musixmatch import MusixmatchAuthenticator
|
||||
from ..lrc import LRCData
|
||||
from ..models import CacheStatus, LyricResult, TrackMeta
|
||||
from ..config import TTL_NETWORK_ERROR, TTL_NOT_FOUND
|
||||
|
||||
_MUSIXMATCH_MACRO_URL = "https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get"
|
||||
_MUSIXMATCH_SEARCH_URL = "https://apic-desktop.musixmatch.com/ws/1.1/track.search"
|
||||
|
||||
# Macro-specific params (format/app_id injected by authenticator)
|
||||
_MXM_MACRO_PARAMS = {
|
||||
"namespace": "lyrics_richsynched",
|
||||
"subtitle_format": "mxm",
|
||||
"optional_calls": "track.richsync",
|
||||
}
|
||||
|
||||
|
||||
def _format_ts(s: float) -> str:
|
||||
mm = int(s) // 60
|
||||
ss = int(s) % 60
|
||||
cs = min(round((s % 1) * 100), 99)
|
||||
return f"[{mm:02d}:{ss:02d}.{cs:02d}]"
|
||||
|
||||
|
||||
def _parse_richsync(body: str) -> Optional[str]:
|
||||
"""Parse richsync JSON body → LRC text. Each entry: {"ts": float, "x": str}."""
|
||||
try:
|
||||
data = json.loads(body)
|
||||
if not isinstance(data, list):
|
||||
return None
|
||||
lines = []
|
||||
for entry in data:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
ts = entry.get("ts")
|
||||
x = entry.get("x")
|
||||
if not isinstance(ts, (int, float)) or not isinstance(x, str):
|
||||
continue
|
||||
lines.append(f"{_format_ts(float(ts))}{x}")
|
||||
return "\n".join(lines) if lines else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_subtitle(body: str) -> Optional[str]:
|
||||
"""Parse subtitle JSON body → LRC text. Each entry: {"text": str, "time": {"total": float}}."""
|
||||
try:
|
||||
data = json.loads(body)
|
||||
if not isinstance(data, list):
|
||||
return None
|
||||
lines = []
|
||||
for entry in data:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
text = entry.get("text")
|
||||
time_obj = entry.get("time")
|
||||
if not isinstance(text, str) or not isinstance(time_obj, dict):
|
||||
continue
|
||||
total = time_obj.get("total")
|
||||
if not isinstance(total, (int, float)):
|
||||
continue
|
||||
lines.append(f"{_format_ts(float(total))}{text}")
|
||||
return "\n".join(lines) if lines else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def _fetch_macro(
|
||||
auth: MusixmatchAuthenticator,
|
||||
params: dict,
|
||||
) -> Optional[LRCData]:
|
||||
"""Call macro.subtitles.get via auth.get_json.
|
||||
|
||||
Returns LRCData (richsync preferred over subtitle), or None when no usable
|
||||
lyrics are found. Raises on HTTP/network errors.
|
||||
"""
|
||||
logger.debug(f"Musixmatch: macro call with {list(params.keys())}")
|
||||
data = await auth.get_json(_MUSIXMATCH_MACRO_URL, {**_MXM_MACRO_PARAMS, **params})
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
# Musixmatch returns body=[] (not {}) when the track is not found
|
||||
body = data.get("message", {}).get("body", {})
|
||||
if not isinstance(body, dict):
|
||||
return None
|
||||
macro_calls = body.get("macro_calls", {})
|
||||
if not isinstance(macro_calls, dict):
|
||||
return None
|
||||
|
||||
# Prefer richsync (word-level timing)
|
||||
richsync_msg = macro_calls.get("track.richsync.get", {}).get("message", {})
|
||||
if (
|
||||
isinstance(richsync_msg, dict)
|
||||
and richsync_msg.get("header", {}).get("status_code") == 200
|
||||
):
|
||||
richsync_body = (
|
||||
richsync_msg.get("body", {}).get("richsync", {}).get("richsync_body")
|
||||
)
|
||||
if isinstance(richsync_body, str):
|
||||
lrc_text = _parse_richsync(richsync_body)
|
||||
if lrc_text:
|
||||
lrc = LRCData(lrc_text)
|
||||
if lrc:
|
||||
logger.debug("Musixmatch: got richsync lyrics")
|
||||
return lrc
|
||||
|
||||
# Fall back to subtitle (line-level timing)
|
||||
subtitle_msg = macro_calls.get("track.subtitles.get", {}).get("message", {})
|
||||
if (
|
||||
isinstance(subtitle_msg, dict)
|
||||
and subtitle_msg.get("header", {}).get("status_code") == 200
|
||||
):
|
||||
subtitle_list = subtitle_msg.get("body", {}).get("subtitle_list", [])
|
||||
if isinstance(subtitle_list, list) and subtitle_list:
|
||||
subtitle_body = subtitle_list[0].get("subtitle", {}).get("subtitle_body")
|
||||
if isinstance(subtitle_body, str):
|
||||
lrc_text = _parse_subtitle(subtitle_body)
|
||||
if lrc_text:
|
||||
lrc = LRCData(lrc_text)
|
||||
if lrc:
|
||||
logger.debug("Musixmatch: got subtitle lyrics")
|
||||
return lrc
|
||||
|
||||
logger.debug("Musixmatch: no usable lyrics in macro response")
|
||||
return None
|
||||
|
||||
|
||||
class MusixmatchSpotifyFetcher(BaseFetcher):
|
||||
"""Direct lookup by Spotify track ID — no search, single request."""
|
||||
|
||||
def __init__(self, auth: MusixmatchAuthenticator) -> None:
|
||||
self.auth = auth
|
||||
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "musixmatch-spotify"
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return bool(track.trackid) and not self.auth.is_cooldown()
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
logger.info(f"Musixmatch-Spotify: fetching lyrics for {track.display_name()}")
|
||||
|
||||
try:
|
||||
lrc = await _fetch_macro(
|
||||
self.auth,
|
||||
{"track_spotify_id": track.trackid}, # type: ignore[dict-item]
|
||||
)
|
||||
except AttributeError:
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
except Exception as e:
|
||||
logger.error(f"Musixmatch-Spotify: fetch failed: {e}")
|
||||
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
|
||||
|
||||
if lrc is None:
|
||||
logger.debug(
|
||||
f"Musixmatch-Spotify: no lyrics found for {track.display_name()}"
|
||||
)
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
logger.info(f"Musixmatch-Spotify: got SUCCESS_SYNCED lyrics ({len(lrc)} lines)")
|
||||
return LyricResult(
|
||||
status=CacheStatus.SUCCESS_SYNCED,
|
||||
lyrics=lrc,
|
||||
source=self.source_name,
|
||||
)
|
||||
|
||||
|
||||
class MusixmatchFetcher(BaseFetcher):
|
||||
"""Metadata search + best-candidate lyric fetch."""
|
||||
|
||||
def __init__(self, auth: MusixmatchAuthenticator) -> None:
|
||||
self.auth = auth
|
||||
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "musixmatch"
|
||||
|
||||
@property
|
||||
def requires_auth(self) -> str:
|
||||
return "musixmatch"
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return bool(track.title) and not self.auth.is_cooldown()
|
||||
|
||||
async def _search(self, track: TrackMeta) -> tuple[Optional[int], float]:
|
||||
"""Search for track metadata. Raises on network/HTTP errors."""
|
||||
params: dict = {
|
||||
"q_track": track.title or "",
|
||||
"page_size": "10",
|
||||
"f_has_lyrics": "1",
|
||||
}
|
||||
if track.artist:
|
||||
params["q_artist"] = track.artist
|
||||
if track.album:
|
||||
params["q_album"] = track.album
|
||||
|
||||
logger.debug(f"Musixmatch: searching for '{track.display_name()}'")
|
||||
data = await self.auth.get_json(_MUSIXMATCH_SEARCH_URL, params)
|
||||
if data is None:
|
||||
return None, 0.0
|
||||
|
||||
track_list = data.get("message", {}).get("body", {}).get("track_list", [])
|
||||
if not isinstance(track_list, list) or not track_list:
|
||||
logger.debug("Musixmatch: search returned 0 results")
|
||||
return None, 0.0
|
||||
|
||||
logger.debug(f"Musixmatch: search returned {len(track_list)} candidates")
|
||||
|
||||
candidates = [
|
||||
SearchCandidate(
|
||||
item=int(t["commontrack_id"]),
|
||||
duration_ms=(
|
||||
float(t["track_length"]) * 1000 if t.get("track_length") else None
|
||||
),
|
||||
is_synced=bool(t.get("has_subtitles") or t.get("has_richsync")),
|
||||
title=t.get("track_name"),
|
||||
artist=t.get("artist_name"),
|
||||
album=t.get("album_name"),
|
||||
)
|
||||
for item in track_list
|
||||
if isinstance(item, dict)
|
||||
and isinstance(t := item.get("track", {}), dict)
|
||||
and isinstance(t.get("commontrack_id"), int)
|
||||
and not t.get("instrumental")
|
||||
]
|
||||
|
||||
best_id, confidence = select_best(
|
||||
candidates,
|
||||
track.length,
|
||||
title=track.title,
|
||||
artist=track.artist,
|
||||
album=track.album,
|
||||
)
|
||||
if best_id is not None:
|
||||
logger.debug(f"Musixmatch: best candidate id={best_id} ({confidence:.0f})")
|
||||
else:
|
||||
logger.debug("Musixmatch: no suitable candidate found")
|
||||
return best_id, confidence
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
logger.info(f"Musixmatch: fetching lyrics for {track.display_name()}")
|
||||
|
||||
try:
|
||||
commontrack_id, confidence = await self._search(track)
|
||||
if commontrack_id is None:
|
||||
logger.debug(f"Musixmatch: no match found for {track.display_name()}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
lrc = await _fetch_macro(
|
||||
self.auth,
|
||||
{"commontrack_id": str(commontrack_id)},
|
||||
)
|
||||
except AttributeError:
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
except Exception as e:
|
||||
logger.error(f"Musixmatch: fetch failed: {e}")
|
||||
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
|
||||
|
||||
if lrc is None:
|
||||
logger.debug(f"Musixmatch: no lyrics for commontrack_id={commontrack_id}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
logger.info(
|
||||
f"Musixmatch: got SUCCESS_SYNCED lyrics "
|
||||
f"for commontrack_id={commontrack_id} ({len(lrc)} lines)"
|
||||
)
|
||||
return LyricResult(
|
||||
status=CacheStatus.SUCCESS_SYNCED,
|
||||
lyrics=lrc,
|
||||
source=self.source_name,
|
||||
confidence=confidence,
|
||||
)
|
||||
@@ -0,0 +1,200 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-25 11:04:51
|
||||
Description: Netease Cloud Music fetcher.
|
||||
|
||||
Uses the public cloudsearch API for searching and the song/lyric API for
|
||||
retrieving lyrics. No authentication required.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from .base import BaseFetcher
|
||||
from .selection import SearchCandidate, select_ranked
|
||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||
from ..lrc import LRCData
|
||||
from ..config import (
|
||||
HTTP_TIMEOUT,
|
||||
TTL_NOT_FOUND,
|
||||
TTL_NETWORK_ERROR,
|
||||
MULTI_CANDIDATE_DELAY_S,
|
||||
UA_BROWSER,
|
||||
)
|
||||
|
||||
_NETEASE_SEARCH_URL = "https://music.163.com/api/cloudsearch/pc"
|
||||
_NETEASE_LYRIC_URL = "https://interface3.music.163.com/api/song/lyric"
|
||||
_NETEASE_BASE_HEADERS = {
|
||||
"User-Agent": UA_BROWSER,
|
||||
"Referer": "https://music.163.com/",
|
||||
"Origin": "https://music.163.com",
|
||||
}
|
||||
|
||||
|
||||
class NeteaseFetcher(BaseFetcher):
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "netease"
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return bool(track.title)
|
||||
|
||||
async def _search(
|
||||
self, track: TrackMeta, limit: int = 10
|
||||
) -> list[tuple[int, float]]:
|
||||
query = f"{track.artist or ''} {track.title or ''}".strip()
|
||||
if not query:
|
||||
return []
|
||||
|
||||
logger.debug(f"Netease: searching for '{query}' (limit={limit})")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
|
||||
resp = await client.post(
|
||||
_NETEASE_SEARCH_URL,
|
||||
headers=_NETEASE_BASE_HEADERS,
|
||||
data={"s": query, "type": "1", "limit": str(limit), "offset": "0"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
result = resp.json()
|
||||
|
||||
if not isinstance(result, dict):
|
||||
logger.error(
|
||||
f"Netease: search returned non-dict: {type(result).__name__}"
|
||||
)
|
||||
return []
|
||||
|
||||
result_body = result.get("result")
|
||||
if not isinstance(result_body, dict):
|
||||
logger.debug("Netease: search 'result' field missing or invalid")
|
||||
return []
|
||||
|
||||
songs = result_body.get("songs")
|
||||
if not isinstance(songs, list) or len(songs) == 0:
|
||||
logger.debug("Netease: search returned 0 results")
|
||||
return []
|
||||
|
||||
logger.debug(f"Netease: search returned {len(songs)} candidates")
|
||||
|
||||
candidates = [
|
||||
SearchCandidate(
|
||||
item=song_id,
|
||||
duration_ms=float(song["dt"])
|
||||
if isinstance(song.get("dt"), int)
|
||||
else None,
|
||||
title=song.get("name"),
|
||||
artist=", ".join(a.get("name", "") for a in song.get("ar", []))
|
||||
or None,
|
||||
album=(song.get("al") or {}).get("name"),
|
||||
)
|
||||
for song in songs
|
||||
if isinstance(song, dict) and isinstance(song_id := song.get("id"), int)
|
||||
]
|
||||
ranked = select_ranked(
|
||||
candidates,
|
||||
track.length,
|
||||
title=track.title,
|
||||
artist=track.artist,
|
||||
album=track.album,
|
||||
)
|
||||
if ranked:
|
||||
logger.debug(
|
||||
"Netease: top candidates: "
|
||||
+ ", ".join(f"id={i} ({c:.0f})" for i, c in ranked)
|
||||
)
|
||||
else:
|
||||
logger.debug("Netease: no suitable candidate found")
|
||||
return ranked
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Netease: search failed: {e}")
|
||||
return []
|
||||
|
||||
async def _get_lyric(
|
||||
self, song_id: int, confidence: float = 0.0
|
||||
) -> Optional[LyricResult]:
|
||||
logger.debug(f"Netease: fetching lyrics for song_id={song_id}")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
|
||||
resp = await client.post(
|
||||
_NETEASE_LYRIC_URL,
|
||||
headers=_NETEASE_BASE_HEADERS,
|
||||
data={
|
||||
"id": str(song_id),
|
||||
"cp": "false",
|
||||
"tv": "0",
|
||||
"lv": "0",
|
||||
"rv": "0",
|
||||
"kv": "0",
|
||||
"yv": "0",
|
||||
"ytv": "0",
|
||||
"yrv": "0",
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if not isinstance(data, dict):
|
||||
logger.error(
|
||||
f"Netease: lyric response is not dict: {type(data).__name__}"
|
||||
)
|
||||
return LyricResult(
|
||||
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
|
||||
)
|
||||
|
||||
lrc_obj = data.get("lrc")
|
||||
if not isinstance(lrc_obj, dict):
|
||||
logger.debug(
|
||||
f"Netease: no 'lrc' object in response for song_id={song_id}"
|
||||
)
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
lrc: str = lrc_obj.get("lyric", "")
|
||||
if not isinstance(lrc, str) or not lrc.strip():
|
||||
logger.debug(f"Netease: empty lyrics for song_id={song_id}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
lrcdata = LRCData(lrc)
|
||||
status = lrcdata.detect_sync_status()
|
||||
logger.info(
|
||||
f"Netease: got {status.value} lyrics for song_id={song_id} "
|
||||
f"({len(lrcdata)} lines)"
|
||||
)
|
||||
return LyricResult(
|
||||
status=status,
|
||||
lyrics=lrcdata,
|
||||
source=self.source_name,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Netease: lyric fetch failed for song_id={song_id}: {e}")
|
||||
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
query = f"{track.artist or ''} {track.title or ''}".strip()
|
||||
if not query:
|
||||
logger.debug("Netease: skipped — insufficient metadata")
|
||||
return None
|
||||
|
||||
logger.info(f"Netease: fetching lyrics for {track.display_name()}")
|
||||
candidates = await self._search(track)
|
||||
if not candidates:
|
||||
logger.debug(f"Netease: no match found for {track.display_name()}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
for i, (song_id, confidence) in enumerate(candidates):
|
||||
if i > 0:
|
||||
await asyncio.sleep(MULTI_CANDIDATE_DELAY_S)
|
||||
result = await self._get_lyric(song_id, confidence=confidence)
|
||||
if result is None or result.status == CacheStatus.NETWORK_ERROR:
|
||||
return result
|
||||
if result.status != CacheStatus.NOT_FOUND:
|
||||
return result
|
||||
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
@@ -0,0 +1,175 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-31 01:54:02
|
||||
Description: QQ Music fetcher via self-hosted API proxy.
|
||||
|
||||
Requires a running qq-music-api instance.
|
||||
The base URL is read from the QQ_MUSIC_API_URL environment variable.
|
||||
|
||||
Search → pick best match → fetch LRC lyrics.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
import httpx
|
||||
from loguru import logger
|
||||
|
||||
from .base import BaseFetcher
|
||||
from .selection import SearchCandidate, select_ranked
|
||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||
from ..lrc import LRCData
|
||||
from ..config import (
|
||||
HTTP_TIMEOUT,
|
||||
TTL_NOT_FOUND,
|
||||
TTL_NETWORK_ERROR,
|
||||
MULTI_CANDIDATE_DELAY_S,
|
||||
)
|
||||
|
||||
_QQ_MUSIC_API_SEARCH_ENDPOINT = "/api/search"
|
||||
_QQ_MUSIC_API_LYRIC_ENDPOINT = "/api/lyric"
|
||||
from ..authenticators import QQMusicAuthenticator
|
||||
|
||||
|
||||
class QQMusicFetcher(BaseFetcher):
|
||||
def __init__(self, auth: QQMusicAuthenticator) -> None:
|
||||
self.auth = auth
|
||||
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "qqmusic"
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return bool(track.title) and self.auth.is_configured()
|
||||
|
||||
async def _search(
|
||||
self, track: TrackMeta, limit: int = 10
|
||||
) -> list[tuple[str, float]]:
|
||||
query = f"{track.artist or ''} {track.title or ''}".strip()
|
||||
if not query:
|
||||
return []
|
||||
|
||||
logger.debug(f"QQMusic: searching for '{query}' (limit={limit})")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
|
||||
resp = await client.get(
|
||||
f"{await self.auth.authenticate()}{_QQ_MUSIC_API_SEARCH_ENDPOINT}",
|
||||
params={"keyword": query, "type": "song", "num": limit},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if data.get("code") != 0:
|
||||
logger.error(f"QQMusic: search API error: {data}")
|
||||
return []
|
||||
|
||||
songs = data.get("data", {}).get("list", [])
|
||||
if not songs:
|
||||
logger.debug("QQMusic: search returned 0 results")
|
||||
return []
|
||||
|
||||
logger.debug(f"QQMusic: search returned {len(songs)} candidates")
|
||||
|
||||
candidates = [
|
||||
SearchCandidate(
|
||||
item=mid,
|
||||
duration_ms=float(song["interval"]) * 1000
|
||||
if isinstance(song.get("interval"), int)
|
||||
else None,
|
||||
title=song.get("name"),
|
||||
artist=", ".join(s.get("name", "") for s in song.get("singer", []))
|
||||
or None,
|
||||
album=(song.get("album") or {}).get("name"),
|
||||
)
|
||||
for song in songs
|
||||
if isinstance(song, dict) and isinstance(mid := song.get("mid"), str)
|
||||
]
|
||||
ranked = select_ranked(
|
||||
candidates,
|
||||
track.length,
|
||||
title=track.title,
|
||||
artist=track.artist,
|
||||
album=track.album,
|
||||
)
|
||||
if ranked:
|
||||
logger.debug(
|
||||
"QQMusic: top candidates: "
|
||||
+ ", ".join(f"mid={m} ({c:.0f})" for m, c in ranked)
|
||||
)
|
||||
else:
|
||||
logger.debug("QQMusic: no suitable candidate found")
|
||||
return ranked
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"QQMusic: search failed: {e}")
|
||||
return []
|
||||
|
||||
async def _get_lyric(
|
||||
self, mid: str, confidence: float = 0.0
|
||||
) -> Optional[LyricResult]:
|
||||
logger.debug(f"QQMusic: fetching lyrics for mid={mid}")
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
|
||||
resp = await client.get(
|
||||
f"{await self.auth.authenticate()}{_QQ_MUSIC_API_LYRIC_ENDPOINT}",
|
||||
params={"mid": mid},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if data.get("code") != 0:
|
||||
logger.error(f"QQMusic: lyric API error: {data}")
|
||||
return LyricResult(
|
||||
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
|
||||
)
|
||||
|
||||
lrc = data.get("data", {}).get("lyric", "")
|
||||
if not isinstance(lrc, str) or not lrc.strip():
|
||||
logger.debug(f"QQMusic: empty lyrics for mid={mid}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
lrcdata = LRCData(lrc)
|
||||
status = lrcdata.detect_sync_status()
|
||||
logger.info(
|
||||
f"QQMusic: got {status.value} lyrics for mid={mid} ({len(lrcdata)} lines)"
|
||||
)
|
||||
return LyricResult(
|
||||
status=status,
|
||||
lyrics=lrcdata,
|
||||
source=self.source_name,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"QQMusic: lyric fetch failed for mid={mid}: {e}")
|
||||
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
if not self.auth.is_configured():
|
||||
logger.debug("QQMusic: skipped — Auth not configured")
|
||||
return None
|
||||
|
||||
query = f"{track.artist or ''} {track.title or ''}".strip()
|
||||
if not query:
|
||||
logger.debug("QQMusic: skipped — insufficient metadata")
|
||||
return None
|
||||
|
||||
logger.info(f"QQMusic: fetching lyrics for {track.display_name()}")
|
||||
candidates = await self._search(track)
|
||||
if not candidates:
|
||||
logger.debug(f"QQMusic: no match found for {track.display_name()}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
for i, (mid, confidence) in enumerate(candidates):
|
||||
if i > 0:
|
||||
await asyncio.sleep(MULTI_CANDIDATE_DELAY_S)
|
||||
result = await self._get_lyric(mid, confidence=confidence)
|
||||
if result is None or result.status == CacheStatus.NETWORK_ERROR:
|
||||
return result
|
||||
if result.status != CacheStatus.NOT_FOUND:
|
||||
return result
|
||||
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
@@ -0,0 +1,211 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-04-04 11:32:23
|
||||
Description: Shared candidate-selection logic for search-based fetchers.
|
||||
|
||||
Each fetcher maps its API-specific results to SearchCandidate, then calls
|
||||
select_best() which scores candidates by metadata similarity, duration
|
||||
proximity, and sync status.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Generic, Optional, TypeVar
|
||||
|
||||
from ..config import (
|
||||
DURATION_TOLERANCE_MS,
|
||||
MULTI_CANDIDATE_LIMIT,
|
||||
SCORE_W_TITLE as _W_TITLE,
|
||||
SCORE_W_ARTIST as _W_ARTIST,
|
||||
SCORE_W_ALBUM as _W_ALBUM,
|
||||
SCORE_W_DURATION as _W_DURATION,
|
||||
SCORE_W_SYNCED as _W_SYNCED,
|
||||
MIN_CONFIDENCE,
|
||||
)
|
||||
from ..normalize import normalize_for_match, normalize_artist
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchCandidate(Generic[T]):
|
||||
"""A normalized search result for best-match selection.
|
||||
|
||||
Attributes:
|
||||
item: The original API-specific object (dict, ID, etc.)
|
||||
duration_ms: Track duration in milliseconds, or None if unknown.
|
||||
is_synced: Whether this candidate is known to have synced lyrics.
|
||||
title: Candidate track title for similarity scoring.
|
||||
artist: Candidate artist name for similarity scoring.
|
||||
album: Candidate album name for similarity scoring.
|
||||
"""
|
||||
|
||||
item: T
|
||||
duration_ms: Optional[float] = None
|
||||
is_synced: bool = False
|
||||
title: Optional[str] = None
|
||||
artist: Optional[str] = None
|
||||
album: Optional[str] = None
|
||||
|
||||
|
||||
def _text_similarity(a: str, b: str) -> float:
|
||||
"""Compare two normalized strings. Returns 0.0-1.0."""
|
||||
if a == b:
|
||||
return 1.0
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
# Containment: one is a substring of the other (e.g. "My Love" vs "My Love (Album Version)")
|
||||
if a in b or b in a:
|
||||
return min(len(a), len(b)) / max(len(a), len(b))
|
||||
return 0.0
|
||||
|
||||
|
||||
def _score_candidate(
|
||||
c: SearchCandidate[T],
|
||||
ref_title: Optional[str],
|
||||
ref_artist: Optional[str],
|
||||
ref_album: Optional[str],
|
||||
ref_length_ms: Optional[int],
|
||||
) -> float:
|
||||
"""Score a candidate from 0-100 based on metadata match quality.
|
||||
|
||||
Scoring works in two tiers:
|
||||
|
||||
1. **Metadata score** — computed from fields available on *both* sides,
|
||||
then rescaled to fill the 0-90 range so that missing fields don't
|
||||
inflate the score. Fields missing on both sides are simply excluded
|
||||
from the calculation (neutral). Fields present on only one side
|
||||
contribute 0 to the numerator but their weight still counts in the
|
||||
denominator (penalty for asymmetric absence).
|
||||
|
||||
2. **Synced bonus** — a flat 10 pts, always applied independently.
|
||||
|
||||
Field weights (before rescaling):
|
||||
- Title: 40
|
||||
- Artist: 30
|
||||
- Album: 10
|
||||
- Duration: 10 (only when reference track has duration; hard mismatch is
|
||||
pre-filtered before scoring)
|
||||
"""
|
||||
raw = 0.0
|
||||
available_weight = 0.0
|
||||
|
||||
# Title
|
||||
if ref_title is not None or c.title is not None:
|
||||
available_weight += _W_TITLE
|
||||
if ref_title is not None and c.title is not None:
|
||||
raw += _W_TITLE * _text_similarity(
|
||||
normalize_for_match(ref_title), normalize_for_match(c.title)
|
||||
)
|
||||
# else both None → excluded
|
||||
|
||||
# Artist
|
||||
if ref_artist is not None or c.artist is not None:
|
||||
available_weight += _W_ARTIST
|
||||
if ref_artist is not None and c.artist is not None:
|
||||
na = normalize_artist(ref_artist)
|
||||
nb = normalize_artist(c.artist)
|
||||
if na == nb:
|
||||
raw += _W_ARTIST
|
||||
else:
|
||||
raw += _W_ARTIST * _text_similarity(
|
||||
normalize_for_match(ref_artist), normalize_for_match(c.artist)
|
||||
)
|
||||
|
||||
# Album
|
||||
if ref_album is not None or c.album is not None:
|
||||
available_weight += _W_ALBUM
|
||||
if ref_album is not None and c.album is not None:
|
||||
raw += _W_ALBUM * _text_similarity(
|
||||
normalize_for_match(ref_album), normalize_for_match(c.album)
|
||||
)
|
||||
|
||||
# Duration — only counted when the reference track has duration.
|
||||
# If the candidate also has duration, it contributes positively when matching
|
||||
# (hard mismatch is already filtered upstream in select_best).
|
||||
# If the candidate lacks duration, it contributes 0 to raw but still counts
|
||||
# in available_weight (penalty for missing verifiable info).
|
||||
# If the reference has no duration, duration is excluded entirely (neutral).
|
||||
if ref_length_ms is not None:
|
||||
available_weight += _W_DURATION
|
||||
if c.duration_ms is not None:
|
||||
diff = abs(c.duration_ms - ref_length_ms)
|
||||
if diff <= DURATION_TOLERANCE_MS:
|
||||
raw += _W_DURATION * (1.0 - diff / DURATION_TOLERANCE_MS)
|
||||
|
||||
# Rescale metadata to 0-90 range
|
||||
_MAX_METADATA = _W_TITLE + _W_ARTIST + _W_ALBUM + _W_DURATION # 90
|
||||
if available_weight > 0:
|
||||
metadata_score = (raw / available_weight) * _MAX_METADATA
|
||||
else:
|
||||
# No comparable fields at all — only synced bonus matters
|
||||
metadata_score = 0.0
|
||||
|
||||
# Synced bonus (always 10 pts, independent of metadata)
|
||||
synced_score = _W_SYNCED if c.is_synced else 0.0
|
||||
|
||||
return metadata_score + synced_score
|
||||
|
||||
|
||||
def select_ranked(
|
||||
candidates: list[SearchCandidate[T]],
|
||||
track_length_ms: Optional[int] = None,
|
||||
*,
|
||||
title: Optional[str] = None,
|
||||
artist: Optional[str] = None,
|
||||
album: Optional[str] = None,
|
||||
min_confidence: float = MIN_CONFIDENCE,
|
||||
max_results: int = MULTI_CANDIDATE_LIMIT,
|
||||
) -> list[tuple[T, float]]:
|
||||
"""Score candidates and return top max_results above min_confidence, sorted by score descending."""
|
||||
scored: list[tuple[T, float]] = []
|
||||
for c in candidates:
|
||||
if (
|
||||
track_length_ms is not None
|
||||
and c.duration_ms is not None
|
||||
and abs(c.duration_ms - track_length_ms) > DURATION_TOLERANCE_MS
|
||||
):
|
||||
continue
|
||||
s = _score_candidate(c, title, artist, album, track_length_ms)
|
||||
if s >= min_confidence:
|
||||
scored.append((c.item, s))
|
||||
scored.sort(key=lambda x: x[1], reverse=True)
|
||||
return scored[:max_results]
|
||||
|
||||
|
||||
def select_best(
|
||||
candidates: list[SearchCandidate[T]],
|
||||
track_length_ms: Optional[int] = None,
|
||||
*,
|
||||
title: Optional[str] = None,
|
||||
artist: Optional[str] = None,
|
||||
album: Optional[str] = None,
|
||||
min_confidence: float = MIN_CONFIDENCE,
|
||||
) -> tuple[Optional[T], float]:
|
||||
"""Pick the best candidate by confidence scoring.
|
||||
|
||||
Returns (item, score). Item is None if no candidate scores above min_confidence.
|
||||
"""
|
||||
if not candidates:
|
||||
return None, 0.0
|
||||
|
||||
best_item: Optional[T] = None
|
||||
best_score = -1.0
|
||||
|
||||
for c in candidates:
|
||||
# Hard duration filter: both sides have duration but they don't match → skip.
|
||||
if (
|
||||
track_length_ms is not None
|
||||
and c.duration_ms is not None
|
||||
and abs(c.duration_ms - track_length_ms) > DURATION_TOLERANCE_MS
|
||||
):
|
||||
continue
|
||||
|
||||
s = _score_candidate(c, title, artist, album, track_length_ms)
|
||||
if s > best_score:
|
||||
best_score = s
|
||||
best_item = c.item
|
||||
|
||||
if best_score < min_confidence:
|
||||
return None, best_score
|
||||
|
||||
return best_item, best_score
|
||||
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-25 10:43:21
|
||||
Description: Spotify fetcher — obtains synced lyrics via Spotify's internal color-lyrics API.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
|
||||
from .base import BaseFetcher
|
||||
from ..authenticators.spotify import SpotifyAuthenticator, SPOTIFY_BASE_HEADERS
|
||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||
from ..lrc import LRCData
|
||||
from ..config import HTTP_TIMEOUT, TTL_NOT_FOUND, TTL_NETWORK_ERROR
|
||||
|
||||
_SPOTIFY_LYRICS_URL = "https://spclient.wg.spotify.com/color-lyrics/v2/track/"
|
||||
|
||||
|
||||
class SpotifyFetcher(BaseFetcher):
|
||||
def __init__(self, auth: SpotifyAuthenticator) -> None:
|
||||
self.auth = auth
|
||||
|
||||
@property
|
||||
def source_name(self) -> str:
|
||||
return "spotify"
|
||||
|
||||
def is_available(self, track: TrackMeta) -> bool:
|
||||
return bool(track.trackid) and self.auth.is_configured()
|
||||
|
||||
@staticmethod
|
||||
def _format_lrc_line(start_ms: int, words: str) -> str:
|
||||
minutes = start_ms // 60000
|
||||
seconds = (start_ms // 1000) % 60
|
||||
centiseconds = round((start_ms % 1000) / 10.0)
|
||||
return f"[{minutes:02d}:{seconds:02d}.{centiseconds:02.0f}]{words}"
|
||||
|
||||
@staticmethod
|
||||
def _is_truly_synced(lines: list[dict]) -> bool:
|
||||
for line in lines:
|
||||
try:
|
||||
ms = int(line.get("startTimeMs", "0"))
|
||||
if ms > 0:
|
||||
return True
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
return False
|
||||
|
||||
async def fetch(
|
||||
self, track: TrackMeta, bypass_cache: bool = False
|
||||
) -> Optional[LyricResult]:
|
||||
if not track.trackid:
|
||||
logger.debug("Spotify: skipped — no trackid in metadata")
|
||||
return None
|
||||
|
||||
logger.info(f"Spotify: fetching lyrics for trackid={track.trackid}")
|
||||
|
||||
token = await self.auth.authenticate()
|
||||
if not token:
|
||||
logger.error("Spotify: cannot fetch lyrics without a token")
|
||||
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
|
||||
|
||||
url = f"{_SPOTIFY_LYRICS_URL}{track.trackid}?format=json&vocalRemoval=false&market=from_token"
|
||||
headers = {
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {token}",
|
||||
**SPOTIFY_BASE_HEADERS,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
|
||||
res = await client.get(url, headers=headers)
|
||||
|
||||
if res.status_code == 404:
|
||||
logger.debug(f"Spotify: 404 for trackid={track.trackid}")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
if res.status_code != 200:
|
||||
logger.error(f"Spotify: lyrics API returned {res.status_code}")
|
||||
return LyricResult(
|
||||
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
|
||||
)
|
||||
|
||||
data = res.json()
|
||||
|
||||
if not isinstance(data, dict) or "lyrics" not in data:
|
||||
logger.error("Spotify: unexpected lyrics response structure")
|
||||
return LyricResult(
|
||||
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
|
||||
)
|
||||
|
||||
lyrics_data = data["lyrics"]
|
||||
sync_type = lyrics_data.get("syncType", "")
|
||||
lines = lyrics_data.get("lines", [])
|
||||
|
||||
if not isinstance(lines, list) or len(lines) == 0:
|
||||
logger.debug("Spotify: response contained no lyric lines")
|
||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||
|
||||
is_synced = sync_type == "LINE_SYNCED" and self._is_truly_synced(lines)
|
||||
|
||||
lrc_lines: list[str] = []
|
||||
for line in lines:
|
||||
words = line.get("words", "")
|
||||
if not isinstance(words, str):
|
||||
continue
|
||||
try:
|
||||
ms = int(line.get("startTimeMs", "0"))
|
||||
except (ValueError, TypeError):
|
||||
ms = 0
|
||||
|
||||
if is_synced:
|
||||
lrc_lines.append(self._format_lrc_line(ms, words))
|
||||
else:
|
||||
lrc_lines.append(f"[00:00.00]{words}")
|
||||
|
||||
content = LRCData("\n".join(lrc_lines))
|
||||
status = (
|
||||
CacheStatus.SUCCESS_SYNCED
|
||||
if is_synced
|
||||
else CacheStatus.SUCCESS_UNSYNCED
|
||||
)
|
||||
|
||||
logger.info(f"Spotify: got {status.value} lyrics ({len(lrc_lines)} lines)")
|
||||
return LyricResult(status=status, lyrics=content, source=self.source_name)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Spotify: lyrics fetch failed: {e}")
|
||||
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
|
||||
Reference in New Issue
Block a user