From 9d6c2e21138a489d04c78f32ab057d1d9810b546 Mon Sep 17 00:00:00 2001
From: Uyanide <pywang0608@foxmail.com>
Date: Sat, 4 Apr 2026 16:02:19 +0200
Subject: [PATCH] feat: add musixmatch as fetcher

---
 README.md                      |  18 +-
 lrx_cli/config.py              |   9 +-
 lrx_cli/fetchers/__init__.py   |  10 +-
 lrx_cli/fetchers/musixmatch.py | 317 +++++++++++++++++++++++++++++++++
 pyproject.toml                 |   2 +-
 uv.lock                        |   2 +-
 6 files changed, 346 insertions(+), 12 deletions(-)
 create mode 100644 lrx_cli/fetchers/musixmatch.py

diff --git a/README.md b/README.md
index adf1f3a..bb77efd 100644
--- a/README.md
+++ b/README.md
@@ -12,11 +12,15 @@ Sources are queried in order. High-confidence results (exact match or manual ins
 
 1. **Local** — sidecar `.lrc` files or embedded audio metadata (FLAC, MP3)
 2. **Cache Search** — fuzzy cross-album lookup in local cache
-3. **Spotify** — synced lyrics via Spotify's API (requires `SPOTIFY_SP_DC`)
+3. **Spotify** — synced lyrics via Spotify's API (requires `SPOTIFY_SP_DC` and Spotify trackid)
 4. **LRCLIB** — exact match from [lrclib.net](https://lrclib.net) (requires full metadata)
-5. **LRCLIB Search** — fuzzy search from lrclib.net (requires at least a title)
-6. **Netease** — Netease Cloud Music public API
-7. **QQ Music** — QQ Music via self-hosted API proxy (requires `QQ_MUSIC_API_URL` that provides the same interface as [tooplick/qq-music-api](https://github.com/tooplick/qq-music-api))
+5. **Musixmatch (Spotify)** — Musixmatch API with Spotify trackid (requires `MUSIXMATCH_USERTOKEN` and Spotify trackid)
+6. **LRCLIB Search** — fuzzy search from lrclib.net (requires at least a title)
+7. **Musixmatch** — Musixmatch API with metadata search (requires `MUSIXMATCH_USERTOKEN` and at least a title)
+8. **Netease** — Netease Cloud Music public API
+9. **QQ Music** — QQ Music via self-hosted API proxy (requires `QQ_MUSIC_API_URL` that provides the same interface as [tooplick/qq-music-api](https://github.com/tooplick/qq-music-api))
+
+> I'm aware that Spotify's lyrics are provided by Musixmatch, but the fact is that Musixmatch's own search will yield different (and more) results than Spotify's, so I treat them as separate sources.
 
 ## Usage
 
@@ -28,7 +32,7 @@ See `lrx --help` for full command reference. Common use cases:
   lrx fetch
   ```
 
-  using a specific player or source to fetch from:
+  targeting a specific player and a source to fetch from:
 
   ```bash
   lrx --player mpd fetch --method lrclib-search
@@ -41,7 +45,7 @@ See `lrx --help` for full command reference. Common use cases:
   lrx search --trackid "5p0ietGkLNEqx1Z7ijkw5g"
   ```
 
-  or for a local file:
+  or by path to a local audio file:
 
   ```bash
   lrx search --path "/path/to/Westlife - My Love.flac"
@@ -75,11 +79,13 @@ Set credentials via environment variable or `.env` file:
 
 ```env
 SPOTIFY_SP_DC=your_cookie_value
+MUSIXMATCH_USERTOKEN=your_musixmatch_usertoken
 QQ_MUSIC_API_URL=https://api.example.com
 PREFERRED_PLAYER=spotify
 ```
 
 - `SPOTIFY_SP_DC` — required for Spotify source. Defaults to empty (disabled Spotify source).
+- `MUSIXMATCH_USERTOKEN` — required for Musixmatch sources ([Curators Settings Page](https://curators.musixmatch.com/settings) -> Login (if required) -> "Copy debug info")
 - `QQ_MUSIC_API_URL` — required for QQ Music source. Defaults to empty (disabled QQ Music source).
 - `PREFERRED_PLAYER` — preferred MPRIS player when multiple are active. Defaults to `spotify`. Only used when no `--player` flag is given and more than one player (or none of them) is currently playing.
 
diff --git a/lrx_cli/config.py b/lrx_cli/config.py
index 03a5c61..b61566d 100644
--- a/lrx_cli/config.py
+++ b/lrx_cli/config.py
@@ -67,7 +67,7 @@ SPOTIFY_SECRET_URL = (
 )
 SPOTIFY_SP_DC = os.environ.get("SPOTIFY_SP_DC", "")
 SPOTIFY_TOKEN_CACHE_FILE = os.path.join(CACHE_DIR, "spotify_token.json")
-SPOTIFY_APP_VERSION = "1.2.87.284.g3ff41c13"
+SPOTIFY_APP_VERSION = "1.2.88.21.g8e037c8f"
 
 # Netease api
 NETEASE_SEARCH_URL = "https://music.163.com/api/cloudsearch/pc"
@@ -80,11 +80,16 @@ LRCLIB_SEARCH_URL = "https://lrclib.net/api/search"
 # QQ Music API (self-hosted proxy)
 QQ_MUSIC_API_URL = os.environ.get("QQ_MUSIC_API_URL", "").rstrip("/")
 
+# Musixmatch desktop API
+MUSIXMATCH_USERTOKEN = os.environ.get("MUSIXMATCH_USERTOKEN", "")
+MUSIXMATCH_SEARCH_URL = "https://apic-desktop.musixmatch.com/ws/1.1/track.search"
+MUSIXMATCH_MACRO_URL = "https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get"
+
 # Player preference (used when multiple MPRIS players are active)
 PREFERRED_PLAYER = os.environ.get("PREFERRED_PLAYER", "spotify")
 
 # User-Agents
-UA_BROWSER = "Mozilla/5.0 (X11; Linux x86_64; rv:148.0) Gecko/20100101 Firefox/148.0"
+UA_BROWSER = "Mozilla/5.0 (X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0"
 UA_LRX = f"LRX-CLI {APP_VERSION} (https://github.com/Uyanide/lrx-cli)"
 
 os.makedirs(CACHE_DIR, exist_ok=True)
diff --git a/lrx_cli/fetchers/__init__.py b/lrx_cli/fetchers/__init__.py
index 9dd199e..c062864 100644
--- a/lrx_cli/fetchers/__init__.py
+++ b/lrx_cli/fetchers/__init__.py
@@ -13,6 +13,7 @@ from .cache_search import CacheSearchFetcher
 from .spotify import SpotifyFetcher
 from .lrclib import LrclibFetcher
 from .lrclib_search import LrclibSearchFetcher
+from .musixmatch import MusixmatchFetcher, MusixmatchSpotifyFetcher
 from .netease import NeteaseFetcher
 from .qqmusic import QQMusicFetcher
 from ..cache import CacheEngine
@@ -23,9 +24,11 @@ FetcherMethodType = Literal[
     "cache-search",
     "spotify",
     "lrclib",
+    "musixmatch-spotify",
     "lrclib-search",
     "netease",
     "qqmusic",
+    "musixmatch",
 ]
 
 # Fetchers within a group run in parallel; groups run sequentially.
@@ -34,8 +37,9 @@ _FETCHER_GROUPS: list[list[FetcherMethodType]] = [
     ["local"],
     ["cache-search"],
     ["spotify"],
-    ["lrclib"],
-    ["lrclib-search", "netease", "qqmusic"],
+    ["lrclib", "musixmatch-spotify"],
+    ["lrclib-search", "musixmatch"],
+    ["netease", "qqmusic"],
 ]
 
 
@@ -46,9 +50,11 @@ def create_fetchers(cache: CacheEngine) -> dict[FetcherMethodType, BaseFetcher]:
         "cache-search": CacheSearchFetcher(cache),
         "spotify": SpotifyFetcher(),
         "lrclib": LrclibFetcher(),
+        "musixmatch-spotify": MusixmatchSpotifyFetcher(),
         "lrclib-search": LrclibSearchFetcher(),
         "netease": NeteaseFetcher(),
         "qqmusic": QQMusicFetcher(),
+        "musixmatch": MusixmatchFetcher(),
     }
     return fetchers
 
diff --git a/lrx_cli/fetchers/musixmatch.py b/lrx_cli/fetchers/musixmatch.py
new file mode 100644
index 0000000..5f07562
--- /dev/null
+++ b/lrx_cli/fetchers/musixmatch.py
@@ -0,0 +1,317 @@
+"""
+Author: Uyanide pywang0608@foxmail.com
+Date: 2026-04-04 15:28:34
+Description: Musixmatch fetchers (desktop API, usertoken auth)
+"""
+
+"""
+Uses the Musixmatch desktop API (apic-desktop.musixmatch.com).
+Requires MUSIXMATCH_USERTOKEN from https://curators.musixmatch.com/settings
+→ "Copy debug info" → find UserToken.
+
+Two fetchers:
+  musixmatch-spotify  — direct lookup by Spotify track ID (exact, no search)
+  musixmatch          — metadata search + multi-candidate fallback
+"""
+
+import json
+from typing import Optional
+from urllib.parse import urlencode
+
+import httpx
+from loguru import logger
+
+from .base import BaseFetcher
+from .selection import SearchCandidate, select_best
+from ..lrc import LRCData
+from ..models import CacheStatus, LyricResult, TrackMeta
+from ..config import (
+    HTTP_TIMEOUT,
+    MUSIXMATCH_MACRO_URL,
+    MUSIXMATCH_SEARCH_URL,
+    MUSIXMATCH_USERTOKEN,
+    TTL_NETWORK_ERROR,
+    TTL_NOT_FOUND,
+)
+
+_MXM_HEADERS = {"Cookie": "x-mxm-token-guid="}
+
+_MXM_MACRO_BASE_PARAMS: dict[str, str] = {
+    "format": "json",
+    "namespace": "lyrics_richsynched",
+    "subtitle_format": "mxm",
+    "optional_calls": "track.richsync",
+    "app_id": "web-desktop-app-v1.0",
+}
+
+
+def _format_ts(s: float) -> str:
+    mm = int(s) // 60
+    ss = int(s) % 60
+    cs = min(round((s % 1) * 100), 99)
+    return f"[{mm:02d}:{ss:02d}.{cs:02d}]"
+
+
+def _parse_richsync(body: str) -> Optional[str]:
+    """Parse richsync JSON body → LRC text. Each entry: {"ts": float, "x": str}."""
+    try:
+        data = json.loads(body)
+        if not isinstance(data, list):
+            return None
+        lines = []
+        for entry in data:
+            if not isinstance(entry, dict):
+                continue
+            ts = entry.get("ts")
+            x = entry.get("x")
+            if not isinstance(ts, (int, float)) or not isinstance(x, str):
+                continue
+            lines.append(f"{_format_ts(float(ts))}{x}")
+        return "\n".join(lines) if lines else None
+    except Exception:
+        return None
+
+
+def _parse_subtitle(body: str) -> Optional[str]:
+    """Parse subtitle JSON body → LRC text. Each entry: {"text": str, "time": {"total": float}}."""
+    try:
+        data = json.loads(body)
+        if not isinstance(data, list):
+            return None
+        lines = []
+        for entry in data:
+            if not isinstance(entry, dict):
+                continue
+            text = entry.get("text")
+            time_obj = entry.get("time")
+            if not isinstance(text, str) or not isinstance(time_obj, dict):
+                continue
+            total = time_obj.get("total")
+            if not isinstance(total, (int, float)):
+                continue
+            lines.append(f"{_format_ts(float(total))}{text}")
+        return "\n".join(lines) if lines else None
+    except Exception:
+        return None
+
+
+async def _fetch_macro(
+    client: httpx.AsyncClient,
+    params: dict[str, str],
+) -> Optional[LRCData]:
+    """
+    Call macro.subtitles.get with given params merged onto base params.
+    Returns LRCData on success (richsync preferred over subtitle),
+    None when the API returns no usable lyrics.
+    Raises on HTTP/network errors.
+    """
+    merged = {**_MXM_MACRO_BASE_PARAMS, **params}
+    url = f"{MUSIXMATCH_MACRO_URL}?{urlencode(merged)}"
+    logger.debug(f"Musixmatch: macro call with {list(params.keys())}")
+
+    resp = await client.get(url, headers=_MXM_HEADERS)
+    resp.raise_for_status()
+
+    data = resp.json()
+    # Musixmatch returns body=[] (not {}) when the track is not found
+    body = data.get("message", {}).get("body", {})
+    if not isinstance(body, dict):
+        return None
+    macro_calls = body.get("macro_calls", {})
+    if not isinstance(macro_calls, dict):
+        return None
+
+    # Prefer richsync (word-level timing)
+    richsync_msg = macro_calls.get("track.richsync.get", {}).get("message", {})
+    if (
+        isinstance(richsync_msg, dict)
+        and richsync_msg.get("header", {}).get("status_code") == 200
+    ):
+        richsync_body = (
+            richsync_msg.get("body", {}).get("richsync", {}).get("richsync_body")
+        )
+        if isinstance(richsync_body, str):
+            lrc_text = _parse_richsync(richsync_body)
+            if lrc_text:
+                lrc = LRCData(lrc_text)
+                if lrc:
+                    logger.debug("Musixmatch: got richsync lyrics")
+                    return lrc
+
+    # Fall back to subtitle (line-level timing)
+    subtitle_msg = macro_calls.get("track.subtitles.get", {}).get("message", {})
+    if (
+        isinstance(subtitle_msg, dict)
+        and subtitle_msg.get("header", {}).get("status_code") == 200
+    ):
+        subtitle_list = subtitle_msg.get("body", {}).get("subtitle_list", [])
+        if isinstance(subtitle_list, list) and subtitle_list:
+            subtitle_body = subtitle_list[0].get("subtitle", {}).get("subtitle_body")
+            if isinstance(subtitle_body, str):
+                lrc_text = _parse_subtitle(subtitle_body)
+                if lrc_text:
+                    lrc = LRCData(lrc_text)
+                    if lrc:
+                        logger.debug("Musixmatch: got subtitle lyrics")
+                        return lrc
+
+    logger.debug("Musixmatch: no usable lyrics in macro response")
+    return None
+
+
+class MusixmatchSpotifyFetcher(BaseFetcher):
+    """Direct lookup by Spotify track ID — no search, single request."""
+
+    @property
+    def source_name(self) -> str:
+        return "musixmatch-spotify"
+
+    def is_available(self, track: TrackMeta) -> bool:
+        return bool(track.trackid) and bool(MUSIXMATCH_USERTOKEN)
+
+    async def fetch(
+        self, track: TrackMeta, bypass_cache: bool = False
+    ) -> Optional[LyricResult]:
+        logger.info(f"Musixmatch-Spotify: fetching lyrics for {track.display_name()}")
+        try:
+            async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
+                lrc = await _fetch_macro(
+                    client,
+                    {
+                        "track_spotify_id": track.trackid,  # type: ignore[dict-item]
+                        "usertoken": MUSIXMATCH_USERTOKEN,
+                    },
+                )
+        except Exception as e:
+            logger.error(f"Musixmatch-Spotify: fetch failed: {e}")
+            return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
+
+        if lrc is None:
+            logger.debug(
+                f"Musixmatch-Spotify: no lyrics found for {track.display_name()}"
+            )
+            return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
+
+        logger.info(f"Musixmatch-Spotify: got SUCCESS_SYNCED lyrics ({len(lrc)} lines)")
+        return LyricResult(
+            status=CacheStatus.SUCCESS_SYNCED,
+            lyrics=lrc,
+            source=self.source_name,
+        )
+
+
+class MusixmatchFetcher(BaseFetcher):
+    """Metadata search + multi-candidate fallback."""
+
+    @property
+    def source_name(self) -> str:
+        return "musixmatch"
+
+    def is_available(self, track: TrackMeta) -> bool:
+        return bool(track.title) and bool(MUSIXMATCH_USERTOKEN)
+
+    async def _search(self, track: TrackMeta) -> tuple[Optional[int], float]:
+        params: dict[str, str] = {
+            "format": "json",
+            "app_id": "web-desktop-app-v1.0",
+            "q_track": track.title or "",
+            "usertoken": MUSIXMATCH_USERTOKEN,
+            "page_size": "10",
+            "f_has_lyrics": "1",
+        }
+        if track.artist:
+            params["q_artist"] = track.artist
+        if track.album:
+            params["q_album"] = track.album
+
+        url = f"{MUSIXMATCH_SEARCH_URL}?{urlencode(params)}"
+        logger.debug(f"Musixmatch: searching for '{track.display_name()}'")
+
+        try:
+            async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
+                resp = await client.get(url, headers=_MXM_HEADERS)
+                resp.raise_for_status()
+                data = resp.json()
+
+            track_list = data.get("message", {}).get("body", {}).get("track_list", [])
+            if not isinstance(track_list, list) or not track_list:
+                logger.debug("Musixmatch: search returned 0 results")
+                return None, 0.0
+
+            logger.debug(f"Musixmatch: search returned {len(track_list)} candidates")
+
+            candidates = [
+                SearchCandidate(
+                    item=int(t["commontrack_id"]),
+                    duration_ms=(
+                        float(t["track_length"]) * 1000
+                        if t.get("track_length")
+                        else None
+                    ),
+                    is_synced=bool(t.get("has_subtitles") or t.get("has_richsync")),
+                    title=t.get("track_name"),
+                    artist=t.get("artist_name"),
+                    album=t.get("album_name"),
+                )
+                for item in track_list
+                if isinstance(item, dict)
+                and isinstance(t := item.get("track", {}), dict)
+                and isinstance(t.get("commontrack_id"), int)
+                and not t.get("instrumental")
+            ]
+
+            best_id, confidence = select_best(
+                candidates,
+                track.length,
+                title=track.title,
+                artist=track.artist,
+                album=track.album,
+            )
+            if best_id is not None:
+                logger.debug(
+                    f"Musixmatch: best candidate id={best_id} ({confidence:.0f})"
+                )
+            else:
+                logger.debug("Musixmatch: no suitable candidate found")
+            return best_id, confidence
+
+        except Exception as e:
+            logger.error(f"Musixmatch: search failed: {e}")
+            return None, 0.0
+
+    async def fetch(
+        self, track: TrackMeta, bypass_cache: bool = False
+    ) -> Optional[LyricResult]:
+        logger.info(f"Musixmatch: fetching lyrics for {track.display_name()}")
+        commontrack_id, confidence = await self._search(track)
+        if commontrack_id is None:
+            logger.debug(f"Musixmatch: no match found for {track.display_name()}")
+            return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
+
+        try:
+            async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
+                lrc = await _fetch_macro(
+                    client,
+                    {
+                        "commontrack_id": str(commontrack_id),
+                        "usertoken": MUSIXMATCH_USERTOKEN,
+                    },
+                )
+        except Exception as e:
+            logger.error(f"Musixmatch: fetch failed: {e}")
+            return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
+
+        if lrc is None:
+            logger.debug(f"Musixmatch: no lyrics for commontrack_id={commontrack_id}")
+            return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
+
+        logger.info(
+            f"Musixmatch: got SUCCESS_SYNCED lyrics "
+            f"for commontrack_id={commontrack_id} ({len(lrc)} lines)"
+        )
+        return LyricResult(
+            status=CacheStatus.SUCCESS_SYNCED,
+            lyrics=lrc,
+            source=self.source_name,
+            confidence=confidence,
+        )
diff --git a/pyproject.toml b/pyproject.toml
index 76e2f84..32ee35d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "lrx-cli"
-version = "0.4.4"
+version = "0.4.5"
 description = "Fetch line-synced lyrics for your music player."
 readme = "README.md"
 requires-python = ">=3.13"
diff --git a/uv.lock b/uv.lock
index 3711ff8..522b9b9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -153,7 +153,7 @@ wheels = [
 
 [[package]]
 name = "lrx-cli"
-version = "0.4.3"
+version = "0.4.4"
 source = { editable = "." }
 dependencies = [
     { name = "cyclopts" },