finish renaming

This commit is contained in:
2026-03-31 22:34:15 +02:00
parent 4d48e21533
commit 88cf0115af
56 changed files with 35 additions and 35 deletions
+41
View File
@@ -0,0 +1,41 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 02:33:26
Description: Fetcher pipeline — registry and types
"""
from typing import Literal
from .base import BaseFetcher
from .local import LocalFetcher
from .cache_search import CacheSearchFetcher
from .spotify import SpotifyFetcher
from .lrclib import LrclibFetcher
from .lrclib_search import LrclibSearchFetcher
from .netease import NeteaseFetcher
from .qqmusic import QQMusicFetcher
from ..cache import CacheEngine
FetcherMethodType = Literal[
"local",
"cache-search",
"spotify",
"lrclib",
"lrclib-search",
"netease",
"qqmusic",
]
def create_fetchers(cache: CacheEngine) -> dict[FetcherMethodType, BaseFetcher]:
"""Instantiate all fetchers. Returns a dict keyed by source name."""
fetchers: dict[FetcherMethodType, BaseFetcher] = {
"local": LocalFetcher(),
"cache-search": CacheSearchFetcher(cache),
"spotify": SpotifyFetcher(),
"lrclib": LrclibFetcher(),
"lrclib-search": LrclibSearchFetcher(),
"netease": NeteaseFetcher(),
"qqmusic": QQMusicFetcher(),
}
return fetchers
+35
View File
@@ -0,0 +1,35 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 02:33:26
Description: Base fetcher class and common interfaces
"""
from abc import ABC, abstractmethod
from typing import Optional
from ..models import TrackMeta, LyricResult
class BaseFetcher(ABC):
@property
@abstractmethod
def source_name(self) -> str:
"""Name of the fetcher source."""
pass
@property
def self_cached(self) -> bool:
"""True if this fetcher manages its own cache (skip per-source cache check)."""
return False
@abstractmethod
def is_available(self, track: TrackMeta) -> bool:
"""Check if the fetcher is available for the given track (e.g. has required metadata)."""
pass
@abstractmethod
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Fetch lyrics for the given track. Returns None if unable to fetch."""
pass
+85
View File
@@ -0,0 +1,85 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-28 05:57:46
Description: Cache-search fetcher — cross-album fuzzy lookup in the local cache
"""
"""
Searches existing cache entries by artist + title with fuzzy normalization,
ignoring album and source. Useful when the same track appears on different
albums or is played from different players.
"""
from typing import Optional
from loguru import logger
from .base import BaseFetcher
from ..models import TrackMeta, LyricResult, CacheStatus
from ..cache import CacheEngine
class CacheSearchFetcher(BaseFetcher):
def __init__(self, cache: CacheEngine) -> None:
self._cache = cache
@property
def source_name(self) -> str:
return "cache-search"
@property
def self_cached(self) -> bool:
return True
def is_available(self, track: TrackMeta) -> bool:
return bool(track.title)
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
if bypass_cache:
logger.debug("Cache-search: bypassed by caller")
return None
if not track.title:
logger.debug("Cache-search: skipped — no title")
return None
# Fast path: exact metadata match (artist+title+album), single SQL query
exact = self._cache.find_best_positive(track)
if exact:
logger.info(f"Cache-search: exact hit ({exact.status.value})")
return exact
# Slow path: fuzzy cross-album search
matches = self._cache.search_by_meta(
artist=track.artist,
title=track.title,
length=track.length,
)
if not matches:
logger.debug(f"Cache-search: no match for {track.display_name()}")
return None
# Pick best: prefer synced, then first available
best = None
for m in matches:
if m.get("status") == CacheStatus.SUCCESS_SYNCED.value:
best = m
break
if best is None:
best = m
if not best or not best.get("lyrics"):
return None
status = CacheStatus(best["status"])
logger.info(
f"Cache-search: fuzzy hit from [{best.get('source')}] "
f"album={best.get('album')!r} ({status.value})"
)
return LyricResult(
status=status,
lyrics=best["lyrics"],
source=self.source_name,
)
+98
View File
@@ -0,0 +1,98 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-26 02:08:41
Description: Local fetcher — reads lyrics from .lrc sidecar files or embedded audio metadata
"""
"""
Priority:
1. Same-directory .lrc file (e.g. /path/to/track.lrc)
2. Embedded lyrics in audio metadata (FLAC, MP3 USLT/SYLT tags)
"""
from typing import Optional
from loguru import logger
from mutagen._file import File
from mutagen.flac import FLAC
from .base import BaseFetcher
from ..models import TrackMeta, LyricResult
from ..lrc import detect_sync_status, normalize_tags, get_audio_path, get_sidecar_path
class LocalFetcher(BaseFetcher):
@property
def source_name(self) -> str:
return "local"
def is_available(self, track: TrackMeta) -> bool:
return track.is_local
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Attempt to read lyrics from local filesystem."""
if not track.is_local or not track.url:
return None
audio_path = get_audio_path(track.url, ensure_exists=False)
if not audio_path:
logger.debug(f"Local: audio URL is not a valid file path: {track.url}")
return None
lrc_path = get_sidecar_path(
track.url, ensure_audio_exists=False, ensure_exists=True
)
if lrc_path:
try:
with open(lrc_path, "r", encoding="utf-8") as f:
content = f.read().strip()
if content:
content = normalize_tags(content)
status = detect_sync_status(content)
logger.info(f"Local: found .lrc sidecar ({status.value})")
return LyricResult(
status=status, lyrics=content, source=self.source_name
)
except Exception as e:
logger.error(f"Local: error reading {lrc_path}: {e}")
else:
logger.debug(f"Local: no .lrc sidecar found for {audio_path}")
# Embedded metadata
if not audio_path.exists():
logger.debug(f"Local: audio file does not exist: {audio_path}")
return None
try:
audio = File(audio_path)
if audio is not None:
lyrics = None
if isinstance(audio, FLAC):
# FLAC stores lyrics in vorbis comment tags
lyrics = (
audio.get("lyrics") or audio.get("unsynclyrics") or [None]
)[0]
elif hasattr(audio, "tags") and audio.tags:
# MP3 / other: look for USLT or SYLT ID3 frames
for key in audio.tags.keys():
if key.startswith("USLT") or key.startswith("SYLT"):
lyrics = str(audio.tags[key])
break
if lyrics:
lyrics = normalize_tags(lyrics.strip())
status = detect_sync_status(lyrics)
logger.info(f"Local: found embedded lyrics ({status.value})")
return LyricResult(
status=status,
lyrics=lyrics,
source=f"{self.source_name} (embedded)",
)
else:
logger.debug("Local: no embedded lyrics found")
except Exception as e:
logger.error(f"Local: error reading metadata for {audio_path}: {e}")
logger.debug(f"Local: no lyrics found for {audio_path}")
return None
+111
View File
@@ -0,0 +1,111 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 05:23:38
Description: LRCLIB fetcher — queries lrclib.net for synced/plain lyrics
"""
"""
Requires complete track metadata (artist, title, album, duration).
"""
from typing import Optional
import httpx
from loguru import logger
from urllib.parse import urlencode
from .base import BaseFetcher
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import normalize_tags
from ..config import (
HTTP_TIMEOUT,
TTL_UNSYNCED,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
LRCLIB_API_URL,
UA_LRX,
)
class LrclibFetcher(BaseFetcher):
@property
def source_name(self) -> str:
return "lrclib"
def is_available(self, track: TrackMeta) -> bool:
return track.is_complete
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Fetch lyrics from LRCLIB. Requires complete metadata."""
if not track.is_complete:
logger.debug("LRCLIB: skipped — incomplete metadata")
return None
params = {
"track_name": track.title,
"artist_name": track.artist,
"album_name": track.album,
"duration": track.length / 1000.0 if track.length else 0,
}
url = f"{LRCLIB_API_URL}?{urlencode(params)}"
logger.info(f"LRCLIB: fetching lyrics for {track.display_name()}")
try:
with httpx.Client(timeout=HTTP_TIMEOUT) as client:
resp = client.get(url, headers={"User-Agent": UA_LRX})
if resp.status_code == 404:
logger.debug(f"LRCLIB: not found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
if resp.status_code != 200:
logger.error(f"LRCLIB: API returned {resp.status_code}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
data = resp.json()
# Validate response
if not isinstance(data, dict):
logger.error(f"LRCLIB: unexpected response type: {type(data).__name__}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
synced = data.get("syncedLyrics")
unsynced = data.get("plainLyrics")
if isinstance(synced, str) and synced.strip():
lyrics = normalize_tags(synced.strip())
logger.info(
f"LRCLIB: got synced lyrics ({len(lyrics.splitlines())} lines)"
)
return LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lyrics,
source=self.source_name,
)
elif isinstance(unsynced, str) and unsynced.strip():
lyrics = normalize_tags(unsynced.strip())
logger.info(
f"LRCLIB: got unsynced lyrics ({len(lyrics.splitlines())} lines)"
)
return LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lyrics,
source=self.source_name,
ttl=TTL_UNSYNCED,
)
else:
logger.debug(f"LRCLIB: empty response for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
except httpx.HTTPError as e:
logger.error(f"LRCLIB: HTTP error: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
except Exception as e:
logger.error(f"LRCLIB: unexpected error: {e}")
return None
+168
View File
@@ -0,0 +1,168 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 05:30:50
Description: LRCLIB search fetcher — fuzzy search via lrclib.net /api/search
"""
"""
Used when metadata is incomplete (no album or duration) but title is available.
Selects the best match by duration when track length is known.
"""
import httpx
from typing import Optional
from loguru import logger
from urllib.parse import urlencode
from .base import BaseFetcher
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import normalize_tags
from ..config import (
HTTP_TIMEOUT,
TTL_UNSYNCED,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
DURATION_TOLERANCE_MS,
LRCLIB_SEARCH_URL,
UA_LRX,
)
class LrclibSearchFetcher(BaseFetcher):
@property
def source_name(self) -> str:
return "lrclib-search"
def is_available(self, track: TrackMeta) -> bool:
return bool(track.title)
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Search LRCLIB for lyrics. Requires at least a title."""
if not track.title:
logger.debug("LRCLIB-search: skipped — no title")
return None
params: dict[str, str] = {"track_name": track.title}
if track.artist:
params["artist_name"] = track.artist
if track.album:
params["album_name"] = track.album
url = f"{LRCLIB_SEARCH_URL}?{urlencode(params)}"
logger.info(f"LRCLIB-search: searching for {track.display_name()}")
try:
with httpx.Client(timeout=HTTP_TIMEOUT) as client:
resp = client.get(url, headers={"User-Agent": UA_LRX})
if resp.status_code != 200:
logger.error(f"LRCLIB-search: API returned {resp.status_code}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
data = resp.json()
if not isinstance(data, list) or len(data) == 0:
logger.debug(f"LRCLIB-search: no results for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
logger.debug(f"LRCLIB-search: got {len(data)} candidates")
# Select best match by duration
best = self._select_best(data, track)
if best is None:
logger.debug("LRCLIB-search: no valid candidate found")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
# Extract lyrics
synced = best.get("syncedLyrics")
unsynced = best.get("plainLyrics")
if isinstance(synced, str) and synced.strip():
lyrics = normalize_tags(synced.strip())
logger.info(
f"LRCLIB-search: got synced lyrics ({len(lyrics.splitlines())} lines)"
)
return LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lyrics,
source=self.source_name,
)
elif isinstance(unsynced, str) and unsynced.strip():
lyrics = normalize_tags(unsynced.strip())
logger.info(
f"LRCLIB-search: got unsynced lyrics ({len(lyrics.splitlines())} lines)"
)
return LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lyrics,
source=self.source_name,
ttl=TTL_UNSYNCED,
)
else:
logger.debug("LRCLIB-search: best candidate has empty lyrics")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
except httpx.HTTPError as e:
logger.error(f"LRCLIB-search: HTTP error: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
except Exception as e:
logger.error(f"LRCLIB-search: unexpected error: {e}")
return None
@staticmethod
def _select_best(candidates: list[dict], track: TrackMeta) -> Optional[dict]:
"""Pick the best candidate, preferring synced lyrics and closest duration."""
if track.length is not None:
track_s = track.length / 1000.0
best: Optional[dict] = None
best_diff = float("inf")
for item in candidates:
if not isinstance(item, dict):
continue
duration = item.get("duration")
if not isinstance(duration, (int, float)):
continue
diff = abs(duration - track_s) * 1000 # compare in ms
if diff > DURATION_TOLERANCE_MS:
continue
# Prefer synced over unsynced at similar duration
has_synced = (
isinstance(item.get("syncedLyrics"), str)
and item["syncedLyrics"].strip()
)
best_synced = (
best is not None
and isinstance(best.get("syncedLyrics"), str)
and best["syncedLyrics"].strip()
)
if diff < best_diff or (
diff == best_diff and has_synced and not best_synced
):
best_diff = diff
best = item
if best is not None:
logger.debug(
f"LRCLIB-search: selected id={best.get('id')} (diff={best_diff:.0f}ms)"
)
return best
logger.debug(
f"LRCLIB-search: no candidate within {DURATION_TOLERANCE_MS}ms"
)
return None
# No duration — pick first with synced lyrics, or just first
for item in candidates:
if (
isinstance(item, dict)
and isinstance(item.get("syncedLyrics"), str)
and item["syncedLyrics"].strip()
):
return item
return candidates[0] if isinstance(candidates[0], dict) else None
+213
View File
@@ -0,0 +1,213 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 11:04:51
Description: Netease Cloud Music fetcher
"""
"""
Uses the public cloudsearch API for searching and the song/lyric API for
retrieving lyrics. No authentication required.
Search results are filtered by duration when the track has a known length
to avoid returning lyrics for the wrong version of a song.
"""
from typing import Optional
import httpx
from loguru import logger
from .base import BaseFetcher
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import detect_sync_status, normalize_tags
from ..config import (
HTTP_TIMEOUT,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
DURATION_TOLERANCE_MS,
NETEASE_SEARCH_URL,
NETEASE_LYRIC_URL,
UA_BROWSER,
)
_HEADERS = {
"User-Agent": UA_BROWSER,
"Referer": "https://music.163.com/",
}
class NeteaseFetcher(BaseFetcher):
@property
def source_name(self) -> str:
return "netease"
def is_available(self, track: TrackMeta) -> bool:
return bool(track.title)
def _search(self, track: TrackMeta, limit: int = 10) -> Optional[int]:
"""Search Netease and return the best-matching song ID.
When ``track.length`` is available, candidates are ranked by duration
difference and only accepted if within ``DURATION_TOLERANCE_MS``.
"""
query = f"{track.artist or ''} {track.title or ''}".strip()
if not query:
return None
logger.debug(f"Netease: searching for '{query}' (limit={limit})")
try:
with httpx.Client(timeout=HTTP_TIMEOUT) as client:
resp = client.post(
NETEASE_SEARCH_URL,
headers=_HEADERS,
data={"s": query, "type": "1", "limit": str(limit), "offset": "0"},
)
resp.raise_for_status()
result = resp.json()
# Validate response
if not isinstance(result, dict):
logger.error(
f"Netease: search returned non-dict: {type(result).__name__}"
)
return None
result_body = result.get("result")
if not isinstance(result_body, dict):
logger.debug("Netease: search 'result' field missing or invalid")
return None
songs = result_body.get("songs")
if not isinstance(songs, list) or len(songs) == 0:
logger.debug("Netease: search returned 0 results")
return None
logger.debug(f"Netease: search returned {len(songs)} candidates")
# Duration-based best-match selection
if track.length is not None:
track_ms = track.length
best_id: Optional[int] = None
best_diff = float("inf")
for song in songs:
if not isinstance(song, dict):
continue
sid = song.get("id")
name = song.get("name", "?")
duration = song.get("dt") # milliseconds
if not isinstance(duration, int):
logger.debug(
f" candidate {sid} '{name}': no duration, skipped"
)
continue
diff = abs(duration - track_ms)
logger.debug(
f" candidate {sid} '{name}': "
f"duration={duration}ms, diff={diff}ms"
)
if diff < best_diff:
best_diff = diff
best_id = sid
if best_id is not None and best_diff <= DURATION_TOLERANCE_MS:
logger.debug(f"Netease: selected id={best_id} (diff={best_diff}ms)")
return best_id
logger.debug(
f"Netease: no candidate within {DURATION_TOLERANCE_MS}ms "
f"(best diff={best_diff}ms)"
)
return None
# No duration info — take the first result
first = songs[0]
if not isinstance(first, dict) or "id" not in first:
logger.error("Netease: first search result has no 'id'")
return None
logger.debug(
f"Netease: no duration available, using first result "
f"id={first['id']} '{first.get('name', '?')}'"
)
return first["id"]
except Exception as e:
logger.error(f"Netease: search failed: {e}")
return None
def _get_lyric(self, song_id: int) -> Optional[LyricResult]:
"""Fetch lyrics for a given Netease song ID."""
logger.debug(f"Netease: fetching lyrics for song_id={song_id}")
try:
with httpx.Client(timeout=HTTP_TIMEOUT) as client:
resp = client.post(
NETEASE_LYRIC_URL,
headers=_HEADERS,
data={
"id": str(song_id),
"cp": "false",
"tv": "0",
"lv": "0",
"rv": "0",
"kv": "0",
"yv": "0",
"ytv": "0",
"yrv": "0",
},
)
resp.raise_for_status()
data = resp.json()
# Validate response
if not isinstance(data, dict):
logger.error(
f"Netease: lyric response is not dict: {type(data).__name__}"
)
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
lrc_obj = data.get("lrc")
if not isinstance(lrc_obj, dict):
logger.debug(
f"Netease: no 'lrc' object in response for song_id={song_id}"
)
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
lrc: str = lrc_obj.get("lyric", "")
if not isinstance(lrc, str) or not lrc.strip():
logger.debug(f"Netease: empty lyrics for song_id={song_id}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
# Determine sync status
lrc = normalize_tags(lrc)
status = detect_sync_status(lrc)
logger.info(
f"Netease: got {status.value} lyrics for song_id={song_id} "
f"({len(lrc.splitlines())} lines)"
)
return LyricResult(
status=status, lyrics=lrc.strip(), source=self.source_name
)
except Exception as e:
logger.error(f"Netease: lyric fetch failed for song_id={song_id}: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Search for the track and fetch its lyrics."""
query = f"{track.artist or ''} {track.title or ''}".strip()
if not query:
logger.debug("Netease: skipped — insufficient metadata")
return None
logger.info(f"Netease: fetching lyrics for {track.display_name()}")
song_id = self._search(track)
if not song_id:
logger.debug(f"Netease: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return self._get_lyric(song_id)
+178
View File
@@ -0,0 +1,178 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 01:54:02
Description: QQ Music fetcher via self-hosted API proxy
"""
"""
Requires a running qq-music-api instance.
The base URL is read from the QQ_MUSIC_API_URL environment variable.
Search → pick best match by duration → fetch LRC lyrics.
"""
from typing import Optional
import httpx
from loguru import logger
from .base import BaseFetcher
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import detect_sync_status, normalize_tags
from ..config import (
HTTP_TIMEOUT,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
DURATION_TOLERANCE_MS,
QQ_MUSIC_API_URL,
)
class QQMusicFetcher(BaseFetcher):
@property
def source_name(self) -> str:
return "qqmusic"
def is_available(self, track: TrackMeta) -> bool:
return bool(track.title) and bool(QQ_MUSIC_API_URL)
def _search(self, track: TrackMeta, limit: int = 10) -> Optional[str]:
"""Search QQ Music and return the best-matching song MID."""
query = f"{track.artist or ''} {track.title or ''}".strip()
if not query:
return None
logger.debug(f"QQMusic: searching for '{query}' (limit={limit})")
try:
with httpx.Client(timeout=HTTP_TIMEOUT) as client:
resp = client.get(
f"{QQ_MUSIC_API_URL}/api/search",
params={"keyword": query, "type": "song", "num": limit},
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
logger.error(f"QQMusic: search API error: {data}")
return None
songs = data.get("data", {}).get("list", [])
if not songs:
logger.debug("QQMusic: search returned 0 results")
return None
logger.debug(f"QQMusic: search returned {len(songs)} candidates")
# Duration-based best-match selection
if track.length is not None:
track_ms = track.length
best_mid: Optional[str] = None
best_diff = float("inf")
for song in songs:
if not isinstance(song, dict):
continue
mid = song.get("mid")
name = song.get("name", "?")
# interval is in seconds
interval = song.get("interval")
if not isinstance(interval, int):
logger.debug(
f" candidate {mid} '{name}': no duration, skipped"
)
continue
duration_ms = interval * 1000
diff = abs(duration_ms - track_ms)
logger.debug(
f" candidate {mid} '{name}': "
f"duration={duration_ms}ms, diff={diff}ms"
)
if diff < best_diff:
best_diff = diff
best_mid = mid
if best_mid is not None and best_diff <= DURATION_TOLERANCE_MS:
logger.debug(
f"QQMusic: selected mid={best_mid} (diff={best_diff}ms)"
)
return best_mid
logger.debug(
f"QQMusic: no candidate within {DURATION_TOLERANCE_MS}ms "
f"(best diff={best_diff}ms)"
)
return None
# No duration info — take the first result
first = songs[0]
if not isinstance(first, dict) or "mid" not in first:
logger.error("QQMusic: first search result has no 'mid'")
return None
logger.debug(
f"QQMusic: no duration available, using first result "
f"mid={first['mid']} '{first.get('name', '?')}'"
)
return first["mid"]
except Exception as e:
logger.error(f"QQMusic: search failed: {e}")
return None
def _get_lyric(self, mid: str) -> Optional[LyricResult]:
"""Fetch lyrics for a given QQ Music song MID."""
logger.debug(f"QQMusic: fetching lyrics for mid={mid}")
try:
with httpx.Client(timeout=HTTP_TIMEOUT) as client:
resp = client.get(
f"{QQ_MUSIC_API_URL}/api/lyric",
params={"mid": mid},
)
resp.raise_for_status()
data = resp.json()
if data.get("code") != 0:
logger.error(f"QQMusic: lyric API error: {data}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
lrc = data.get("data", {}).get("lyric", "")
if not isinstance(lrc, str) or not lrc.strip():
logger.debug(f"QQMusic: empty lyrics for mid={mid}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
lrc = normalize_tags(lrc)
status = detect_sync_status(lrc)
logger.info(
f"QQMusic: got {status.value} lyrics for mid={mid} "
f"({len(lrc.splitlines())} lines)"
)
return LyricResult(
status=status, lyrics=lrc.strip(), source=self.source_name
)
except Exception as e:
logger.error(f"QQMusic: lyric fetch failed for mid={mid}: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Search for the track and fetch its lyrics."""
if not QQ_MUSIC_API_URL:
logger.debug("QQMusic: skipped — QQ_MUSIC_API_URL not configured")
return None
query = f"{track.artist or ''} {track.title or ''}".strip()
if not query:
logger.debug("QQMusic: skipped — insufficient metadata")
return None
logger.info(f"QQMusic: fetching lyrics for {track.display_name()}")
mid = self._search(track)
if not mid:
logger.debug(f"QQMusic: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
return self._get_lyric(mid)
+373
View File
@@ -0,0 +1,373 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 10:43:21
Description: Spotify fetcher — obtains synced lyrics via Spotify's internal color-lyrics API.
"""
"""
Authentication flow:
1. Fetch server time from Spotify
2. Fetch TOTP secret
3. Generate a TOTP code and exchange it (with SP_DC cookie) for an access token
4. Request lyrics using the access token
The secret and token are cached on the instance to avoid redundant network
calls within the same session.
Requires SPOTIFY_SP_DC environment variable to be set.
"""
import httpx
import json
import time
import struct
import hmac
import hashlib
from typing import Optional, Tuple
from loguru import logger
from .base import BaseFetcher
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import normalize_tags
from ..config import (
HTTP_TIMEOUT,
SPOTIFY_APP_VERSION,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
SPOTIFY_TOKEN_URL,
SPOTIFY_LYRICS_URL,
SPOTIFY_SERVER_TIME_URL,
SPOTIFY_SECRET_URL,
SPOTIFY_SP_DC,
SPOTIFY_TOKEN_CACHE_FILE,
UA_BROWSER,
)
class SpotifyFetcher(BaseFetcher):
def __init__(self) -> None:
# Session-level caches to avoid refetching within the same run
self._cached_secret: Optional[Tuple[str, int]] = None
self._cached_token: Optional[str] = None
self._token_expires_at: float = 0.0
@property
def source_name(self) -> str:
return "spotify"
def is_available(self, track: TrackMeta) -> bool:
return bool(track.trackid) and bool(SPOTIFY_SP_DC)
# ─── Auth helpers ────────────────────────────────────────────────
def _get_server_time(self, client: httpx.Client) -> Optional[int]:
"""Fetch Spotify's server timestamp (seconds since epoch)."""
try:
res = client.get(SPOTIFY_SERVER_TIME_URL, timeout=HTTP_TIMEOUT)
res.raise_for_status()
data = res.json()
if not isinstance(data, dict) or "serverTime" not in data:
logger.error(f"Spotify: unexpected server-time response: {data}")
return None
server_time = data["serverTime"]
logger.debug(f"Spotify: server time = {server_time}")
return server_time
except Exception as e:
logger.error(f"Spotify: failed to fetch server time: {e}")
return None
def _get_secret(self, client: httpx.Client) -> Optional[Tuple[str, int]]:
"""Fetch and decode the TOTP secret. Cached after first success.
Response format: [{version: int, secret: str}, ...]
Each character in *secret* is XOR-decoded with ``(index % 33) + 9``.
"""
if self._cached_secret is not None:
logger.debug("Spotify: using cached TOTP secret")
return self._cached_secret
try:
res = client.get(SPOTIFY_SECRET_URL, timeout=HTTP_TIMEOUT)
res.raise_for_status()
data = res.json()
if not isinstance(data, list) or len(data) == 0:
logger.error(
f"Spotify: unexpected secrets response (type={type(data).__name__}, len={len(data) if isinstance(data, list) else '?'})"
)
return None
last = data[-1]
if "secret" not in last or "version" not in last:
logger.error(f"Spotify: malformed secret entry: {list(last.keys())}")
return None
secret_raw = last["secret"]
version = last["version"]
# XOR decode
parts = []
for i, char in enumerate(secret_raw):
parts.append(str(ord(char) ^ ((i % 33) + 9)))
secret = "".join(parts)
logger.debug(f"Spotify: decoded secret v{version} (len={len(secret)})")
self._cached_secret = (secret, version)
return self._cached_secret
except Exception as e:
logger.error(f"Spotify: failed to fetch secret: {e}")
return None
@staticmethod
def _generate_totp(server_time_s: int, secret: str) -> str:
"""Generate a 6-digit TOTP code compatible with Spotify's auth.
Uses HMAC-SHA1 with a 30-second period, matching the Go reference.
"""
counter = server_time_s // 30
counter_bytes = struct.pack(">Q", counter)
mac = hmac.new(secret.encode(), counter_bytes, hashlib.sha1).digest()
offset = mac[-1] & 0x0F
binary_code = (
(mac[offset] & 0x7F) << 24
| (mac[offset + 1] & 0xFF) << 16
| (mac[offset + 2] & 0xFF) << 8
| (mac[offset + 3] & 0xFF)
)
code = binary_code % (10**6)
return str(code).zfill(6)
def _load_cached_token(self) -> Optional[str]:
"""Try to load a valid token from the persistent cache file."""
try:
with open(SPOTIFY_TOKEN_CACHE_FILE, "r") as f:
data = json.load(f)
expires_ms = data.get("accessTokenExpirationTimestampMs", 0)
if expires_ms <= int(time.time() * 1000):
logger.debug("Spotify: persisted token expired")
return None
token = data.get("accessToken", "")
if not token:
return None
self._cached_token = token
self._token_expires_at = expires_ms / 1000.0
logger.debug("Spotify: loaded token from cache file")
return token
except (FileNotFoundError, json.JSONDecodeError, KeyError):
return None
def _save_token(self, body: dict) -> None:
"""Persist the token response to disk."""
try:
with open(SPOTIFY_TOKEN_CACHE_FILE, "w") as f:
json.dump(body, f)
logger.debug("Spotify: token saved to cache file")
except Exception as e:
logger.warning(f"Spotify: failed to write token cache: {e}")
def _get_token(self) -> Optional[str]:
"""Obtain a Spotify access token. Cached in memory and on disk.
Requires SP_DC cookie (set via SPOTIFY_SP_DC env var).
"""
# 1. Memory cache
if self._cached_token and time.time() < self._token_expires_at - 30:
logger.debug("Spotify: using in-memory cached token")
return self._cached_token
# 2. Disk cache
disk_token = self._load_cached_token()
if disk_token and time.time() < self._token_expires_at - 30:
return disk_token
# 3. Fetch new token
if not SPOTIFY_SP_DC:
logger.error(
"Spotify: SPOTIFY_SP_DC env var not set — "
"cannot authenticate with Spotify"
)
return None
headers = {
"User-Agent": UA_BROWSER,
"Accept": "*/*",
"Referer": "https://open.spotify.com/",
"Cookie": f"sp_dc={SPOTIFY_SP_DC}",
}
with httpx.Client(headers=headers) as client:
server_time = self._get_server_time(client)
if server_time is None:
return None
secret_data = self._get_secret(client)
if secret_data is None:
return None
secret, version = secret_data
totp = self._generate_totp(server_time, secret)
logger.debug(f"Spotify: generated TOTP v{version}: {totp}")
params = {
"reason": "init",
"productType": "web-player",
"totp": totp,
"totpVer": str(version),
"totpServer": totp,
}
try:
res = client.get(SPOTIFY_TOKEN_URL, params=params, timeout=HTTP_TIMEOUT)
if res.status_code != 200:
logger.error(f"Spotify: token request returned {res.status_code}")
return None
body = res.json()
if not isinstance(body, dict) or "accessToken" not in body:
logger.error(
f"Spotify: unexpected token response keys: {list(body.keys()) if isinstance(body, dict) else type(body).__name__}"
)
return None
token = body["accessToken"]
is_anonymous = body.get("isAnonymous", False)
if is_anonymous:
logger.warning(
"Spotify: received anonymous token — SP_DC may be invalid"
)
expires_ms = body.get("accessTokenExpirationTimestampMs", 0)
if expires_ms and expires_ms > int(time.time() * 1000):
self._token_expires_at = expires_ms / 1000.0
else:
logger.warning("Spotify: token expiry missing or invalid")
self._token_expires_at = time.time() + 3600
self._cached_token = token
# Persist to disk (including anonymous tokens, same as Go ref)
self._save_token(body)
logger.debug("Spotify: obtained access token")
return token
except Exception as e:
logger.error(f"Spotify: token request failed: {e}")
return None
# ─── Lyrics ──────────────────────────────────────────────────────
@staticmethod
def _format_lrc_line(start_ms: int, words: str) -> str:
"""Format a single lyric line as LRC ``[mm:ss.cc]text``."""
minutes = start_ms // 60000
seconds = (start_ms // 1000) % 60
centiseconds = round((start_ms % 1000) / 10.0)
return f"[{minutes:02d}:{seconds:02d}.{centiseconds:02.0f}]{words}"
@staticmethod
def _is_truly_synced(lines: list[dict]) -> bool:
"""Check if lyrics are actually synced (not all timestamps zero)."""
for line in lines:
try:
ms = int(line.get("startTimeMs", "0"))
if ms > 0:
return True
except (ValueError, TypeError):
continue
return False
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Fetch lyrics for a Spotify track by its track ID."""
if not track.trackid:
logger.debug("Spotify: skipped — no trackid in metadata")
return None
logger.info(f"Spotify: fetching lyrics for trackid={track.trackid}")
token = self._get_token()
if not token:
logger.error("Spotify: cannot fetch lyrics without a token")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
url = f"{SPOTIFY_LYRICS_URL}{track.trackid}?format=json&vocalRemoval=false&market=from_token"
headers = {
"User-Agent": UA_BROWSER,
"Accept": "application/json",
"Authorization": f"Bearer {token}",
"Referer": "https://open.spotify.com/",
"App-Platform": "WebPlayer",
"Spotify-App-Version": SPOTIFY_APP_VERSION,
"Origin": "https://open.spotify.com",
}
try:
with httpx.Client(timeout=HTTP_TIMEOUT) as client:
res = client.get(url, headers=headers)
if res.status_code == 404:
logger.debug(f"Spotify: 404 for trackid={track.trackid}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
if res.status_code != 200:
logger.error(f"Spotify: lyrics API returned {res.status_code}")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
data = res.json()
# Validate response structure
if not isinstance(data, dict) or "lyrics" not in data:
logger.error("Spotify: unexpected lyrics response structure")
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
lyrics_data = data["lyrics"]
sync_type = lyrics_data.get("syncType", "")
lines = lyrics_data.get("lines", [])
if not isinstance(lines, list) or len(lines) == 0:
logger.debug("Spotify: response contained no lyric lines")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
# Determine sync status
# syncType == "LINE_SYNCED" AND at least one non-zero timestamp
is_synced = sync_type == "LINE_SYNCED" and self._is_truly_synced(lines)
# Convert to LRC
lrc_lines: list[str] = []
for line in lines:
words = line.get("words", "")
if not isinstance(words, str):
continue
try:
ms = int(line.get("startTimeMs", "0"))
except (ValueError, TypeError):
ms = 0
if is_synced:
lrc_lines.append(self._format_lrc_line(ms, words))
else:
# Unsynced: emit with zero timestamps
lrc_lines.append(f"[00:00.00]{words}")
content = normalize_tags("\n".join(lrc_lines))
status = (
CacheStatus.SUCCESS_SYNCED
if is_synced
else CacheStatus.SUCCESS_UNSYNCED
)
logger.info(f"Spotify: got {status.value} lyrics ({len(lrc_lines)} lines)")
return LyricResult(status=status, lyrics=content, source=self.source_name)
except Exception as e:
logger.error(f"Spotify: lyrics fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)