""" Author: Uyanide pywang0608@foxmail.com Date: 2026-03-25 10:43:21 Description: Spotify fetcher — obtains synced lyrics via Spotify's internal color-lyrics API. """ """ Authentication flow: 1. Fetch server time from Spotify 2. Fetch TOTP secret 3. Generate a TOTP code and exchange it (with SP_DC cookie) for an access token 4. Request lyrics using the access token The secret and token are cached on the instance to avoid redundant network calls within the same session. Requires SPOTIFY_SP_DC environment variable to be set. """ import httpx import json import time import struct import hmac import hashlib from typing import Optional, Tuple from loguru import logger from .base import BaseFetcher from ..models import TrackMeta, LyricResult, CacheStatus from ..lrc import normalize_tags from ..config import ( HTTP_TIMEOUT, SPOTIFY_APP_VERSION, TTL_NOT_FOUND, TTL_NETWORK_ERROR, SPOTIFY_TOKEN_URL, SPOTIFY_LYRICS_URL, SPOTIFY_SERVER_TIME_URL, SPOTIFY_SECRET_URL, SPOTIFY_SP_DC, SPOTIFY_TOKEN_CACHE_FILE, UA_BROWSER, ) class SpotifyFetcher(BaseFetcher): def __init__(self) -> None: # Session-level caches to avoid refetching within the same run self._cached_secret: Optional[Tuple[str, int]] = None self._cached_token: Optional[str] = None self._token_expires_at: float = 0.0 @property def source_name(self) -> str: return "spotify" def is_available(self, track: TrackMeta) -> bool: return bool(track.trackid) and bool(SPOTIFY_SP_DC) # ─── Auth helpers ──────────────────────────────────────────────── def _get_server_time(self, client: httpx.Client) -> Optional[int]: """Fetch Spotify's server timestamp (seconds since epoch).""" try: res = client.get(SPOTIFY_SERVER_TIME_URL, timeout=HTTP_TIMEOUT) res.raise_for_status() data = res.json() if not isinstance(data, dict) or "serverTime" not in data: logger.error(f"Spotify: unexpected server-time response: {data}") return None server_time = data["serverTime"] logger.debug(f"Spotify: server time = {server_time}") return server_time except Exception as e: logger.error(f"Spotify: failed to fetch server time: {e}") return None def _get_secret(self, client: httpx.Client) -> Optional[Tuple[str, int]]: """Fetch and decode the TOTP secret. Cached after first success. Response format: [{version: int, secret: str}, ...] Each character in *secret* is XOR-decoded with ``(index % 33) + 9``. """ if self._cached_secret is not None: logger.debug("Spotify: using cached TOTP secret") return self._cached_secret try: res = client.get(SPOTIFY_SECRET_URL, timeout=HTTP_TIMEOUT) res.raise_for_status() data = res.json() if not isinstance(data, list) or len(data) == 0: logger.error( f"Spotify: unexpected secrets response (type={type(data).__name__}, len={len(data) if isinstance(data, list) else '?'})" ) return None last = data[-1] if "secret" not in last or "version" not in last: logger.error(f"Spotify: malformed secret entry: {list(last.keys())}") return None secret_raw = last["secret"] version = last["version"] # XOR decode parts = [] for i, char in enumerate(secret_raw): parts.append(str(ord(char) ^ ((i % 33) + 9))) secret = "".join(parts) logger.debug(f"Spotify: decoded secret v{version} (len={len(secret)})") self._cached_secret = (secret, version) return self._cached_secret except Exception as e: logger.error(f"Spotify: failed to fetch secret: {e}") return None @staticmethod def _generate_totp(server_time_s: int, secret: str) -> str: """Generate a 6-digit TOTP code compatible with Spotify's auth. Uses HMAC-SHA1 with a 30-second period, matching the Go reference. """ counter = server_time_s // 30 counter_bytes = struct.pack(">Q", counter) mac = hmac.new(secret.encode(), counter_bytes, hashlib.sha1).digest() offset = mac[-1] & 0x0F binary_code = ( (mac[offset] & 0x7F) << 24 | (mac[offset + 1] & 0xFF) << 16 | (mac[offset + 2] & 0xFF) << 8 | (mac[offset + 3] & 0xFF) ) code = binary_code % (10**6) return str(code).zfill(6) def _load_cached_token(self) -> Optional[str]: """Try to load a valid token from the persistent cache file.""" try: with open(SPOTIFY_TOKEN_CACHE_FILE, "r") as f: data = json.load(f) expires_ms = data.get("accessTokenExpirationTimestampMs", 0) if expires_ms <= int(time.time() * 1000): logger.debug("Spotify: persisted token expired") return None token = data.get("accessToken", "") if not token: return None self._cached_token = token self._token_expires_at = expires_ms / 1000.0 logger.debug("Spotify: loaded token from cache file") return token except (FileNotFoundError, json.JSONDecodeError, KeyError): return None def _save_token(self, body: dict) -> None: """Persist the token response to disk.""" try: with open(SPOTIFY_TOKEN_CACHE_FILE, "w") as f: json.dump(body, f) logger.debug("Spotify: token saved to cache file") except Exception as e: logger.warning(f"Spotify: failed to write token cache: {e}") def _get_token(self) -> Optional[str]: """Obtain a Spotify access token. Cached in memory and on disk. Requires SP_DC cookie (set via SPOTIFY_SP_DC env var). """ # 1. Memory cache if self._cached_token and time.time() < self._token_expires_at - 30: logger.debug("Spotify: using in-memory cached token") return self._cached_token # 2. Disk cache disk_token = self._load_cached_token() if disk_token and time.time() < self._token_expires_at - 30: return disk_token # 3. Fetch new token if not SPOTIFY_SP_DC: logger.error( "Spotify: SPOTIFY_SP_DC env var not set — " "cannot authenticate with Spotify" ) return None headers = { "User-Agent": UA_BROWSER, "Accept": "*/*", "Referer": "https://open.spotify.com/", "Cookie": f"sp_dc={SPOTIFY_SP_DC}", } with httpx.Client(headers=headers) as client: server_time = self._get_server_time(client) if server_time is None: return None secret_data = self._get_secret(client) if secret_data is None: return None secret, version = secret_data totp = self._generate_totp(server_time, secret) logger.debug(f"Spotify: generated TOTP v{version}: {totp}") params = { "reason": "init", "productType": "web-player", "totp": totp, "totpVer": str(version), "totpServer": totp, } try: res = client.get(SPOTIFY_TOKEN_URL, params=params, timeout=HTTP_TIMEOUT) if res.status_code != 200: logger.error(f"Spotify: token request returned {res.status_code}") return None body = res.json() if not isinstance(body, dict) or "accessToken" not in body: logger.error( f"Spotify: unexpected token response keys: {list(body.keys()) if isinstance(body, dict) else type(body).__name__}" ) return None token = body["accessToken"] is_anonymous = body.get("isAnonymous", False) if is_anonymous: logger.warning( "Spotify: received anonymous token — SP_DC may be invalid" ) expires_ms = body.get("accessTokenExpirationTimestampMs", 0) if expires_ms and expires_ms > int(time.time() * 1000): self._token_expires_at = expires_ms / 1000.0 else: logger.warning("Spotify: token expiry missing or invalid") self._token_expires_at = time.time() + 3600 self._cached_token = token # Persist to disk (including anonymous tokens, same as Go ref) self._save_token(body) logger.debug("Spotify: obtained access token") return token except Exception as e: logger.error(f"Spotify: token request failed: {e}") return None # ─── Lyrics ────────────────────────────────────────────────────── @staticmethod def _format_lrc_line(start_ms: int, words: str) -> str: """Format a single lyric line as LRC ``[mm:ss.cc]text``.""" minutes = start_ms // 60000 seconds = (start_ms // 1000) % 60 centiseconds = round((start_ms % 1000) / 10.0) return f"[{minutes:02d}:{seconds:02d}.{centiseconds:02.0f}]{words}" @staticmethod def _is_truly_synced(lines: list[dict]) -> bool: """Check if lyrics are actually synced (not all timestamps zero).""" for line in lines: try: ms = int(line.get("startTimeMs", "0")) if ms > 0: return True except (ValueError, TypeError): continue return False def fetch( self, track: TrackMeta, bypass_cache: bool = False ) -> Optional[LyricResult]: """Fetch lyrics for a Spotify track by its track ID.""" if not track.trackid: logger.debug("Spotify: skipped — no trackid in metadata") return None logger.info(f"Spotify: fetching lyrics for trackid={track.trackid}") token = self._get_token() if not token: logger.error("Spotify: cannot fetch lyrics without a token") return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) url = f"{SPOTIFY_LYRICS_URL}{track.trackid}?format=json&vocalRemoval=false&market=from_token" headers = { "User-Agent": UA_BROWSER, "Accept": "application/json", "Authorization": f"Bearer {token}", "Referer": "https://open.spotify.com/", "App-Platform": "WebPlayer", "Spotify-App-Version": SPOTIFY_APP_VERSION, "Origin": "https://open.spotify.com", } try: with httpx.Client(timeout=HTTP_TIMEOUT) as client: res = client.get(url, headers=headers) if res.status_code == 404: logger.debug(f"Spotify: 404 for trackid={track.trackid}") return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) if res.status_code != 200: logger.error(f"Spotify: lyrics API returned {res.status_code}") return LyricResult( status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR ) data = res.json() # Validate response structure if not isinstance(data, dict) or "lyrics" not in data: logger.error("Spotify: unexpected lyrics response structure") return LyricResult( status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR ) lyrics_data = data["lyrics"] sync_type = lyrics_data.get("syncType", "") lines = lyrics_data.get("lines", []) if not isinstance(lines, list) or len(lines) == 0: logger.debug("Spotify: response contained no lyric lines") return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) # Determine sync status # syncType == "LINE_SYNCED" AND at least one non-zero timestamp is_synced = sync_type == "LINE_SYNCED" and self._is_truly_synced(lines) # Convert to LRC lrc_lines: list[str] = [] for line in lines: words = line.get("words", "") if not isinstance(words, str): continue try: ms = int(line.get("startTimeMs", "0")) except (ValueError, TypeError): ms = 0 if is_synced: lrc_lines.append(self._format_lrc_line(ms, words)) else: # Unsynced: emit with zero timestamps lrc_lines.append(f"[00:00.00]{words}") content = normalize_tags("\n".join(lrc_lines)) status = ( CacheStatus.SUCCESS_SYNCED if is_synced else CacheStatus.SUCCESS_UNSYNCED ) logger.info(f"Spotify: got {status.value} lyrics ({len(lrc_lines)} lines)") return LyricResult(status=status, lyrics=content, source=self.source_name) except Exception as e: logger.error(f"Spotify: lyrics fetch failed: {e}") return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)