From 9d6c2e21138a489d04c78f32ab057d1d9810b546 Mon Sep 17 00:00:00 2001 From: Uyanide Date: Sat, 4 Apr 2026 16:02:19 +0200 Subject: [PATCH] feat: add musixmatch as fetcher --- README.md | 18 +- lrx_cli/config.py | 9 +- lrx_cli/fetchers/__init__.py | 10 +- lrx_cli/fetchers/musixmatch.py | 317 +++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- uv.lock | 2 +- 6 files changed, 346 insertions(+), 12 deletions(-) create mode 100644 lrx_cli/fetchers/musixmatch.py diff --git a/README.md b/README.md index adf1f3a..bb77efd 100644 --- a/README.md +++ b/README.md @@ -12,11 +12,15 @@ Sources are queried in order. High-confidence results (exact match or manual ins 1. **Local** — sidecar `.lrc` files or embedded audio metadata (FLAC, MP3) 2. **Cache Search** — fuzzy cross-album lookup in local cache -3. **Spotify** — synced lyrics via Spotify's API (requires `SPOTIFY_SP_DC`) +3. **Spotify** — synced lyrics via Spotify's API (requires `SPOTIFY_SP_DC` and Spotify trackid) 4. **LRCLIB** — exact match from [lrclib.net](https://lrclib.net) (requires full metadata) -5. **LRCLIB Search** — fuzzy search from lrclib.net (requires at least a title) -6. **Netease** — Netease Cloud Music public API -7. **QQ Music** — QQ Music via self-hosted API proxy (requires `QQ_MUSIC_API_URL` that provides the same interface as [tooplick/qq-music-api](https://github.com/tooplick/qq-music-api)) +5. **Musixmatch (Spotify)** — Musixmatch API with Spotify trackid (requires `MUSIXMATCH_USERTOKEN` and Spotify trackid) +6. **LRCLIB Search** — fuzzy search from lrclib.net (requires at least a title) +7. **Musixmatch** — Musixmatch API with metadata search (requires `MUSIXMATCH_USERTOKEN` and at least a title) +8. **Netease** — Netease Cloud Music public API +9. **QQ Music** — QQ Music via self-hosted API proxy (requires `QQ_MUSIC_API_URL` that provides the same interface as [tooplick/qq-music-api](https://github.com/tooplick/qq-music-api)) + +> I'm aware that Spotify's lyrics are provided by Musixmatch, but the fact is that Musixmatch's own search will yield different (and more) results than Spotify's, so I treat them as separate sources. ## Usage @@ -28,7 +32,7 @@ See `lrx --help` for full command reference. Common use cases: lrx fetch ``` - using a specific player or source to fetch from: + targeting a specific player and a source to fetch from: ```bash lrx --player mpd fetch --method lrclib-search @@ -41,7 +45,7 @@ See `lrx --help` for full command reference. Common use cases: lrx search --trackid "5p0ietGkLNEqx1Z7ijkw5g" ``` - or for a local file: + or by path to a local audio file: ```bash lrx search --path "/path/to/Westlife - My Love.flac" @@ -75,11 +79,13 @@ Set credentials via environment variable or `.env` file: ```env SPOTIFY_SP_DC=your_cookie_value +MUSIXMATCH_USERTOKEN=your_musixmatch_usertoken QQ_MUSIC_API_URL=https://api.example.com PREFERRED_PLAYER=spotify ``` - `SPOTIFY_SP_DC` — required for Spotify source. Defaults to empty (disabled Spotify source). +- `MUSIXMATCH_USERTOKEN` — required for Musixmatch sources ([Curators Settings Page](https://curators.musixmatch.com/settings) -> Login (if required) -> "Copy debug info") - `QQ_MUSIC_API_URL` — required for QQ Music source. Defaults to empty (disabled QQ Music source). - `PREFERRED_PLAYER` — preferred MPRIS player when multiple are active. Defaults to `spotify`. Only used when no `--player` flag is given and more than one player (or none of them) is currently playing. diff --git a/lrx_cli/config.py b/lrx_cli/config.py index 03a5c61..b61566d 100644 --- a/lrx_cli/config.py +++ b/lrx_cli/config.py @@ -67,7 +67,7 @@ SPOTIFY_SECRET_URL = ( ) SPOTIFY_SP_DC = os.environ.get("SPOTIFY_SP_DC", "") SPOTIFY_TOKEN_CACHE_FILE = os.path.join(CACHE_DIR, "spotify_token.json") -SPOTIFY_APP_VERSION = "1.2.87.284.g3ff41c13" +SPOTIFY_APP_VERSION = "1.2.88.21.g8e037c8f" # Netease api NETEASE_SEARCH_URL = "https://music.163.com/api/cloudsearch/pc" @@ -80,11 +80,16 @@ LRCLIB_SEARCH_URL = "https://lrclib.net/api/search" # QQ Music API (self-hosted proxy) QQ_MUSIC_API_URL = os.environ.get("QQ_MUSIC_API_URL", "").rstrip("/") +# Musixmatch desktop API +MUSIXMATCH_USERTOKEN = os.environ.get("MUSIXMATCH_USERTOKEN", "") +MUSIXMATCH_SEARCH_URL = "https://apic-desktop.musixmatch.com/ws/1.1/track.search" +MUSIXMATCH_MACRO_URL = "https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get" + # Player preference (used when multiple MPRIS players are active) PREFERRED_PLAYER = os.environ.get("PREFERRED_PLAYER", "spotify") # User-Agents -UA_BROWSER = "Mozilla/5.0 (X11; Linux x86_64; rv:148.0) Gecko/20100101 Firefox/148.0" +UA_BROWSER = "Mozilla/5.0 (X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0" UA_LRX = f"LRX-CLI {APP_VERSION} (https://github.com/Uyanide/lrx-cli)" os.makedirs(CACHE_DIR, exist_ok=True) diff --git a/lrx_cli/fetchers/__init__.py b/lrx_cli/fetchers/__init__.py index 9dd199e..c062864 100644 --- a/lrx_cli/fetchers/__init__.py +++ b/lrx_cli/fetchers/__init__.py @@ -13,6 +13,7 @@ from .cache_search import CacheSearchFetcher from .spotify import SpotifyFetcher from .lrclib import LrclibFetcher from .lrclib_search import LrclibSearchFetcher +from .musixmatch import MusixmatchFetcher, MusixmatchSpotifyFetcher from .netease import NeteaseFetcher from .qqmusic import QQMusicFetcher from ..cache import CacheEngine @@ -23,9 +24,11 @@ FetcherMethodType = Literal[ "cache-search", "spotify", "lrclib", + "musixmatch-spotify", "lrclib-search", "netease", "qqmusic", + "musixmatch", ] # Fetchers within a group run in parallel; groups run sequentially. @@ -34,8 +37,9 @@ _FETCHER_GROUPS: list[list[FetcherMethodType]] = [ ["local"], ["cache-search"], ["spotify"], - ["lrclib"], - ["lrclib-search", "netease", "qqmusic"], + ["lrclib", "musixmatch-spotify"], + ["lrclib-search", "musixmatch"], + ["netease", "qqmusic"], ] @@ -46,9 +50,11 @@ def create_fetchers(cache: CacheEngine) -> dict[FetcherMethodType, BaseFetcher]: "cache-search": CacheSearchFetcher(cache), "spotify": SpotifyFetcher(), "lrclib": LrclibFetcher(), + "musixmatch-spotify": MusixmatchSpotifyFetcher(), "lrclib-search": LrclibSearchFetcher(), "netease": NeteaseFetcher(), "qqmusic": QQMusicFetcher(), + "musixmatch": MusixmatchFetcher(), } return fetchers diff --git a/lrx_cli/fetchers/musixmatch.py b/lrx_cli/fetchers/musixmatch.py new file mode 100644 index 0000000..5f07562 --- /dev/null +++ b/lrx_cli/fetchers/musixmatch.py @@ -0,0 +1,317 @@ +""" +Author: Uyanide pywang0608@foxmail.com +Date: 2026-04-04 15:28:34 +Description: Musixmatch fetchers (desktop API, usertoken auth) +""" + +""" +Uses the Musixmatch desktop API (apic-desktop.musixmatch.com). +Requires MUSIXMATCH_USERTOKEN from https://curators.musixmatch.com/settings +→ "Copy debug info" → find UserToken. + +Two fetchers: + musixmatch-spotify — direct lookup by Spotify track ID (exact, no search) + musixmatch — metadata search + multi-candidate fallback +""" + +import json +from typing import Optional +from urllib.parse import urlencode + +import httpx +from loguru import logger + +from .base import BaseFetcher +from .selection import SearchCandidate, select_best +from ..lrc import LRCData +from ..models import CacheStatus, LyricResult, TrackMeta +from ..config import ( + HTTP_TIMEOUT, + MUSIXMATCH_MACRO_URL, + MUSIXMATCH_SEARCH_URL, + MUSIXMATCH_USERTOKEN, + TTL_NETWORK_ERROR, + TTL_NOT_FOUND, +) + +_MXM_HEADERS = {"Cookie": "x-mxm-token-guid="} + +_MXM_MACRO_BASE_PARAMS: dict[str, str] = { + "format": "json", + "namespace": "lyrics_richsynched", + "subtitle_format": "mxm", + "optional_calls": "track.richsync", + "app_id": "web-desktop-app-v1.0", +} + + +def _format_ts(s: float) -> str: + mm = int(s) // 60 + ss = int(s) % 60 + cs = min(round((s % 1) * 100), 99) + return f"[{mm:02d}:{ss:02d}.{cs:02d}]" + + +def _parse_richsync(body: str) -> Optional[str]: + """Parse richsync JSON body → LRC text. Each entry: {"ts": float, "x": str}.""" + try: + data = json.loads(body) + if not isinstance(data, list): + return None + lines = [] + for entry in data: + if not isinstance(entry, dict): + continue + ts = entry.get("ts") + x = entry.get("x") + if not isinstance(ts, (int, float)) or not isinstance(x, str): + continue + lines.append(f"{_format_ts(float(ts))}{x}") + return "\n".join(lines) if lines else None + except Exception: + return None + + +def _parse_subtitle(body: str) -> Optional[str]: + """Parse subtitle JSON body → LRC text. Each entry: {"text": str, "time": {"total": float}}.""" + try: + data = json.loads(body) + if not isinstance(data, list): + return None + lines = [] + for entry in data: + if not isinstance(entry, dict): + continue + text = entry.get("text") + time_obj = entry.get("time") + if not isinstance(text, str) or not isinstance(time_obj, dict): + continue + total = time_obj.get("total") + if not isinstance(total, (int, float)): + continue + lines.append(f"{_format_ts(float(total))}{text}") + return "\n".join(lines) if lines else None + except Exception: + return None + + +async def _fetch_macro( + client: httpx.AsyncClient, + params: dict[str, str], +) -> Optional[LRCData]: + """ + Call macro.subtitles.get with given params merged onto base params. + Returns LRCData on success (richsync preferred over subtitle), + None when the API returns no usable lyrics. + Raises on HTTP/network errors. + """ + merged = {**_MXM_MACRO_BASE_PARAMS, **params} + url = f"{MUSIXMATCH_MACRO_URL}?{urlencode(merged)}" + logger.debug(f"Musixmatch: macro call with {list(params.keys())}") + + resp = await client.get(url, headers=_MXM_HEADERS) + resp.raise_for_status() + + data = resp.json() + # Musixmatch returns body=[] (not {}) when the track is not found + body = data.get("message", {}).get("body", {}) + if not isinstance(body, dict): + return None + macro_calls = body.get("macro_calls", {}) + if not isinstance(macro_calls, dict): + return None + + # Prefer richsync (word-level timing) + richsync_msg = macro_calls.get("track.richsync.get", {}).get("message", {}) + if ( + isinstance(richsync_msg, dict) + and richsync_msg.get("header", {}).get("status_code") == 200 + ): + richsync_body = ( + richsync_msg.get("body", {}).get("richsync", {}).get("richsync_body") + ) + if isinstance(richsync_body, str): + lrc_text = _parse_richsync(richsync_body) + if lrc_text: + lrc = LRCData(lrc_text) + if lrc: + logger.debug("Musixmatch: got richsync lyrics") + return lrc + + # Fall back to subtitle (line-level timing) + subtitle_msg = macro_calls.get("track.subtitles.get", {}).get("message", {}) + if ( + isinstance(subtitle_msg, dict) + and subtitle_msg.get("header", {}).get("status_code") == 200 + ): + subtitle_list = subtitle_msg.get("body", {}).get("subtitle_list", []) + if isinstance(subtitle_list, list) and subtitle_list: + subtitle_body = subtitle_list[0].get("subtitle", {}).get("subtitle_body") + if isinstance(subtitle_body, str): + lrc_text = _parse_subtitle(subtitle_body) + if lrc_text: + lrc = LRCData(lrc_text) + if lrc: + logger.debug("Musixmatch: got subtitle lyrics") + return lrc + + logger.debug("Musixmatch: no usable lyrics in macro response") + return None + + +class MusixmatchSpotifyFetcher(BaseFetcher): + """Direct lookup by Spotify track ID — no search, single request.""" + + @property + def source_name(self) -> str: + return "musixmatch-spotify" + + def is_available(self, track: TrackMeta) -> bool: + return bool(track.trackid) and bool(MUSIXMATCH_USERTOKEN) + + async def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: + logger.info(f"Musixmatch-Spotify: fetching lyrics for {track.display_name()}") + try: + async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client: + lrc = await _fetch_macro( + client, + { + "track_spotify_id": track.trackid, # type: ignore[dict-item] + "usertoken": MUSIXMATCH_USERTOKEN, + }, + ) + except Exception as e: + logger.error(f"Musixmatch-Spotify: fetch failed: {e}") + return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) + + if lrc is None: + logger.debug( + f"Musixmatch-Spotify: no lyrics found for {track.display_name()}" + ) + return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) + + logger.info(f"Musixmatch-Spotify: got SUCCESS_SYNCED lyrics ({len(lrc)} lines)") + return LyricResult( + status=CacheStatus.SUCCESS_SYNCED, + lyrics=lrc, + source=self.source_name, + ) + + +class MusixmatchFetcher(BaseFetcher): + """Metadata search + multi-candidate fallback.""" + + @property + def source_name(self) -> str: + return "musixmatch" + + def is_available(self, track: TrackMeta) -> bool: + return bool(track.title) and bool(MUSIXMATCH_USERTOKEN) + + async def _search(self, track: TrackMeta) -> tuple[Optional[int], float]: + params: dict[str, str] = { + "format": "json", + "app_id": "web-desktop-app-v1.0", + "q_track": track.title or "", + "usertoken": MUSIXMATCH_USERTOKEN, + "page_size": "10", + "f_has_lyrics": "1", + } + if track.artist: + params["q_artist"] = track.artist + if track.album: + params["q_album"] = track.album + + url = f"{MUSIXMATCH_SEARCH_URL}?{urlencode(params)}" + logger.debug(f"Musixmatch: searching for '{track.display_name()}'") + + try: + async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client: + resp = await client.get(url, headers=_MXM_HEADERS) + resp.raise_for_status() + data = resp.json() + + track_list = data.get("message", {}).get("body", {}).get("track_list", []) + if not isinstance(track_list, list) or not track_list: + logger.debug("Musixmatch: search returned 0 results") + return None, 0.0 + + logger.debug(f"Musixmatch: search returned {len(track_list)} candidates") + + candidates = [ + SearchCandidate( + item=int(t["commontrack_id"]), + duration_ms=( + float(t["track_length"]) * 1000 + if t.get("track_length") + else None + ), + is_synced=bool(t.get("has_subtitles") or t.get("has_richsync")), + title=t.get("track_name"), + artist=t.get("artist_name"), + album=t.get("album_name"), + ) + for item in track_list + if isinstance(item, dict) + and isinstance(t := item.get("track", {}), dict) + and isinstance(t.get("commontrack_id"), int) + and not t.get("instrumental") + ] + + best_id, confidence = select_best( + candidates, + track.length, + title=track.title, + artist=track.artist, + album=track.album, + ) + if best_id is not None: + logger.debug( + f"Musixmatch: best candidate id={best_id} ({confidence:.0f})" + ) + else: + logger.debug("Musixmatch: no suitable candidate found") + return best_id, confidence + + except Exception as e: + logger.error(f"Musixmatch: search failed: {e}") + return None, 0.0 + + async def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: + logger.info(f"Musixmatch: fetching lyrics for {track.display_name()}") + commontrack_id, confidence = await self._search(track) + if commontrack_id is None: + logger.debug(f"Musixmatch: no match found for {track.display_name()}") + return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) + + try: + async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client: + lrc = await _fetch_macro( + client, + { + "commontrack_id": str(commontrack_id), + "usertoken": MUSIXMATCH_USERTOKEN, + }, + ) + except Exception as e: + logger.error(f"Musixmatch: fetch failed: {e}") + return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) + + if lrc is None: + logger.debug(f"Musixmatch: no lyrics for commontrack_id={commontrack_id}") + return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) + + logger.info( + f"Musixmatch: got SUCCESS_SYNCED lyrics " + f"for commontrack_id={commontrack_id} ({len(lrc)} lines)" + ) + return LyricResult( + status=CacheStatus.SUCCESS_SYNCED, + lyrics=lrc, + source=self.source_name, + confidence=confidence, + ) diff --git a/pyproject.toml b/pyproject.toml index 76e2f84..32ee35d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "lrx-cli" -version = "0.4.4" +version = "0.4.5" description = "Fetch line-synced lyrics for your music player." readme = "README.md" requires-python = ">=3.13" diff --git a/uv.lock b/uv.lock index 3711ff8..522b9b9 100644 --- a/uv.lock +++ b/uv.lock @@ -153,7 +153,7 @@ wheels = [ [[package]] name = "lrx-cli" -version = "0.4.3" +version = "0.4.4" source = { editable = "." } dependencies = [ { name = "cyclopts" },