feat: add musixmatch as fetcher

This commit is contained in:
2026-04-04 16:02:19 +02:00
parent 38dc845abe
commit 9d6c2e2113
6 changed files with 346 additions and 12 deletions
+12 -6
View File
@@ -12,11 +12,15 @@ Sources are queried in order. High-confidence results (exact match or manual ins
1. **Local** — sidecar `.lrc` files or embedded audio metadata (FLAC, MP3) 1. **Local** — sidecar `.lrc` files or embedded audio metadata (FLAC, MP3)
2. **Cache Search** — fuzzy cross-album lookup in local cache 2. **Cache Search** — fuzzy cross-album lookup in local cache
3. **Spotify** — synced lyrics via Spotify's API (requires `SPOTIFY_SP_DC`) 3. **Spotify** — synced lyrics via Spotify's API (requires `SPOTIFY_SP_DC` and Spotify trackid)
4. **LRCLIB** — exact match from [lrclib.net](https://lrclib.net) (requires full metadata) 4. **LRCLIB** — exact match from [lrclib.net](https://lrclib.net) (requires full metadata)
5. **LRCLIB Search** — fuzzy search from lrclib.net (requires at least a title) 5. **Musixmatch (Spotify)** — Musixmatch API with Spotify trackid (requires `MUSIXMATCH_USERTOKEN` and Spotify trackid)
6. **Netease** — Netease Cloud Music public API 6. **LRCLIB Search** — fuzzy search from lrclib.net (requires at least a title)
7. **QQ Music** QQ Music via self-hosted API proxy (requires `QQ_MUSIC_API_URL` that provides the same interface as [tooplick/qq-music-api](https://github.com/tooplick/qq-music-api)) 7. **Musixmatch** — Musixmatch API with metadata search (requires `MUSIXMATCH_USERTOKEN` and at least a title)
8. **Netease** — Netease Cloud Music public API
9. **QQ Music** — QQ Music via self-hosted API proxy (requires `QQ_MUSIC_API_URL` that provides the same interface as [tooplick/qq-music-api](https://github.com/tooplick/qq-music-api))
> I'm aware that Spotify's lyrics are provided by Musixmatch, but the fact is that Musixmatch's own search will yield different (and more) results than Spotify's, so I treat them as separate sources.
## Usage ## Usage
@@ -28,7 +32,7 @@ See `lrx --help` for full command reference. Common use cases:
lrx fetch lrx fetch
``` ```
using a specific player or source to fetch from: targeting a specific player and a source to fetch from:
```bash ```bash
lrx --player mpd fetch --method lrclib-search lrx --player mpd fetch --method lrclib-search
@@ -41,7 +45,7 @@ See `lrx --help` for full command reference. Common use cases:
lrx search --trackid "5p0ietGkLNEqx1Z7ijkw5g" lrx search --trackid "5p0ietGkLNEqx1Z7ijkw5g"
``` ```
or for a local file: or by path to a local audio file:
```bash ```bash
lrx search --path "/path/to/Westlife - My Love.flac" lrx search --path "/path/to/Westlife - My Love.flac"
@@ -75,11 +79,13 @@ Set credentials via environment variable or `.env` file:
```env ```env
SPOTIFY_SP_DC=your_cookie_value SPOTIFY_SP_DC=your_cookie_value
MUSIXMATCH_USERTOKEN=your_musixmatch_usertoken
QQ_MUSIC_API_URL=https://api.example.com QQ_MUSIC_API_URL=https://api.example.com
PREFERRED_PLAYER=spotify PREFERRED_PLAYER=spotify
``` ```
- `SPOTIFY_SP_DC` — required for Spotify source. Defaults to empty (disabled Spotify source). - `SPOTIFY_SP_DC` — required for Spotify source. Defaults to empty (disabled Spotify source).
- `MUSIXMATCH_USERTOKEN` — required for Musixmatch sources ([Curators Settings Page](https://curators.musixmatch.com/settings) -> Login (if required) -> "Copy debug info")
- `QQ_MUSIC_API_URL` — required for QQ Music source. Defaults to empty (disabled QQ Music source). - `QQ_MUSIC_API_URL` — required for QQ Music source. Defaults to empty (disabled QQ Music source).
- `PREFERRED_PLAYER` — preferred MPRIS player when multiple are active. Defaults to `spotify`. Only used when no `--player` flag is given and more than one player (or none of them) is currently playing. - `PREFERRED_PLAYER` — preferred MPRIS player when multiple are active. Defaults to `spotify`. Only used when no `--player` flag is given and more than one player (or none of them) is currently playing.
+7 -2
View File
@@ -67,7 +67,7 @@ SPOTIFY_SECRET_URL = (
) )
SPOTIFY_SP_DC = os.environ.get("SPOTIFY_SP_DC", "") SPOTIFY_SP_DC = os.environ.get("SPOTIFY_SP_DC", "")
SPOTIFY_TOKEN_CACHE_FILE = os.path.join(CACHE_DIR, "spotify_token.json") SPOTIFY_TOKEN_CACHE_FILE = os.path.join(CACHE_DIR, "spotify_token.json")
SPOTIFY_APP_VERSION = "1.2.87.284.g3ff41c13" SPOTIFY_APP_VERSION = "1.2.88.21.g8e037c8f"
# Netease api # Netease api
NETEASE_SEARCH_URL = "https://music.163.com/api/cloudsearch/pc" NETEASE_SEARCH_URL = "https://music.163.com/api/cloudsearch/pc"
@@ -80,11 +80,16 @@ LRCLIB_SEARCH_URL = "https://lrclib.net/api/search"
# QQ Music API (self-hosted proxy) # QQ Music API (self-hosted proxy)
QQ_MUSIC_API_URL = os.environ.get("QQ_MUSIC_API_URL", "").rstrip("/") QQ_MUSIC_API_URL = os.environ.get("QQ_MUSIC_API_URL", "").rstrip("/")
# Musixmatch desktop API
MUSIXMATCH_USERTOKEN = os.environ.get("MUSIXMATCH_USERTOKEN", "")
MUSIXMATCH_SEARCH_URL = "https://apic-desktop.musixmatch.com/ws/1.1/track.search"
MUSIXMATCH_MACRO_URL = "https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get"
# Player preference (used when multiple MPRIS players are active) # Player preference (used when multiple MPRIS players are active)
PREFERRED_PLAYER = os.environ.get("PREFERRED_PLAYER", "spotify") PREFERRED_PLAYER = os.environ.get("PREFERRED_PLAYER", "spotify")
# User-Agents # User-Agents
UA_BROWSER = "Mozilla/5.0 (X11; Linux x86_64; rv:148.0) Gecko/20100101 Firefox/148.0" UA_BROWSER = "Mozilla/5.0 (X11; Linux x86_64; rv:149.0) Gecko/20100101 Firefox/149.0"
UA_LRX = f"LRX-CLI {APP_VERSION} (https://github.com/Uyanide/lrx-cli)" UA_LRX = f"LRX-CLI {APP_VERSION} (https://github.com/Uyanide/lrx-cli)"
os.makedirs(CACHE_DIR, exist_ok=True) os.makedirs(CACHE_DIR, exist_ok=True)
+8 -2
View File
@@ -13,6 +13,7 @@ from .cache_search import CacheSearchFetcher
from .spotify import SpotifyFetcher from .spotify import SpotifyFetcher
from .lrclib import LrclibFetcher from .lrclib import LrclibFetcher
from .lrclib_search import LrclibSearchFetcher from .lrclib_search import LrclibSearchFetcher
from .musixmatch import MusixmatchFetcher, MusixmatchSpotifyFetcher
from .netease import NeteaseFetcher from .netease import NeteaseFetcher
from .qqmusic import QQMusicFetcher from .qqmusic import QQMusicFetcher
from ..cache import CacheEngine from ..cache import CacheEngine
@@ -23,9 +24,11 @@ FetcherMethodType = Literal[
"cache-search", "cache-search",
"spotify", "spotify",
"lrclib", "lrclib",
"musixmatch-spotify",
"lrclib-search", "lrclib-search",
"netease", "netease",
"qqmusic", "qqmusic",
"musixmatch",
] ]
# Fetchers within a group run in parallel; groups run sequentially. # Fetchers within a group run in parallel; groups run sequentially.
@@ -34,8 +37,9 @@ _FETCHER_GROUPS: list[list[FetcherMethodType]] = [
["local"], ["local"],
["cache-search"], ["cache-search"],
["spotify"], ["spotify"],
["lrclib"], ["lrclib", "musixmatch-spotify"],
["lrclib-search", "netease", "qqmusic"], ["lrclib-search", "musixmatch"],
["netease", "qqmusic"],
] ]
@@ -46,9 +50,11 @@ def create_fetchers(cache: CacheEngine) -> dict[FetcherMethodType, BaseFetcher]:
"cache-search": CacheSearchFetcher(cache), "cache-search": CacheSearchFetcher(cache),
"spotify": SpotifyFetcher(), "spotify": SpotifyFetcher(),
"lrclib": LrclibFetcher(), "lrclib": LrclibFetcher(),
"musixmatch-spotify": MusixmatchSpotifyFetcher(),
"lrclib-search": LrclibSearchFetcher(), "lrclib-search": LrclibSearchFetcher(),
"netease": NeteaseFetcher(), "netease": NeteaseFetcher(),
"qqmusic": QQMusicFetcher(), "qqmusic": QQMusicFetcher(),
"musixmatch": MusixmatchFetcher(),
} }
return fetchers return fetchers
+317
View File
@@ -0,0 +1,317 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-04-04 15:28:34
Description: Musixmatch fetchers (desktop API, usertoken auth)
"""
"""
Uses the Musixmatch desktop API (apic-desktop.musixmatch.com).
Requires MUSIXMATCH_USERTOKEN from https://curators.musixmatch.com/settings
"Copy debug info" → find UserToken.
Two fetchers:
musixmatch-spotify — direct lookup by Spotify track ID (exact, no search)
musixmatch — metadata search + multi-candidate fallback
"""
import json
from typing import Optional
from urllib.parse import urlencode
import httpx
from loguru import logger
from .base import BaseFetcher
from .selection import SearchCandidate, select_best
from ..lrc import LRCData
from ..models import CacheStatus, LyricResult, TrackMeta
from ..config import (
HTTP_TIMEOUT,
MUSIXMATCH_MACRO_URL,
MUSIXMATCH_SEARCH_URL,
MUSIXMATCH_USERTOKEN,
TTL_NETWORK_ERROR,
TTL_NOT_FOUND,
)
_MXM_HEADERS = {"Cookie": "x-mxm-token-guid="}
_MXM_MACRO_BASE_PARAMS: dict[str, str] = {
"format": "json",
"namespace": "lyrics_richsynched",
"subtitle_format": "mxm",
"optional_calls": "track.richsync",
"app_id": "web-desktop-app-v1.0",
}
def _format_ts(s: float) -> str:
mm = int(s) // 60
ss = int(s) % 60
cs = min(round((s % 1) * 100), 99)
return f"[{mm:02d}:{ss:02d}.{cs:02d}]"
def _parse_richsync(body: str) -> Optional[str]:
"""Parse richsync JSON body → LRC text. Each entry: {"ts": float, "x": str}."""
try:
data = json.loads(body)
if not isinstance(data, list):
return None
lines = []
for entry in data:
if not isinstance(entry, dict):
continue
ts = entry.get("ts")
x = entry.get("x")
if not isinstance(ts, (int, float)) or not isinstance(x, str):
continue
lines.append(f"{_format_ts(float(ts))}{x}")
return "\n".join(lines) if lines else None
except Exception:
return None
def _parse_subtitle(body: str) -> Optional[str]:
"""Parse subtitle JSON body → LRC text. Each entry: {"text": str, "time": {"total": float}}."""
try:
data = json.loads(body)
if not isinstance(data, list):
return None
lines = []
for entry in data:
if not isinstance(entry, dict):
continue
text = entry.get("text")
time_obj = entry.get("time")
if not isinstance(text, str) or not isinstance(time_obj, dict):
continue
total = time_obj.get("total")
if not isinstance(total, (int, float)):
continue
lines.append(f"{_format_ts(float(total))}{text}")
return "\n".join(lines) if lines else None
except Exception:
return None
async def _fetch_macro(
client: httpx.AsyncClient,
params: dict[str, str],
) -> Optional[LRCData]:
"""
Call macro.subtitles.get with given params merged onto base params.
Returns LRCData on success (richsync preferred over subtitle),
None when the API returns no usable lyrics.
Raises on HTTP/network errors.
"""
merged = {**_MXM_MACRO_BASE_PARAMS, **params}
url = f"{MUSIXMATCH_MACRO_URL}?{urlencode(merged)}"
logger.debug(f"Musixmatch: macro call with {list(params.keys())}")
resp = await client.get(url, headers=_MXM_HEADERS)
resp.raise_for_status()
data = resp.json()
# Musixmatch returns body=[] (not {}) when the track is not found
body = data.get("message", {}).get("body", {})
if not isinstance(body, dict):
return None
macro_calls = body.get("macro_calls", {})
if not isinstance(macro_calls, dict):
return None
# Prefer richsync (word-level timing)
richsync_msg = macro_calls.get("track.richsync.get", {}).get("message", {})
if (
isinstance(richsync_msg, dict)
and richsync_msg.get("header", {}).get("status_code") == 200
):
richsync_body = (
richsync_msg.get("body", {}).get("richsync", {}).get("richsync_body")
)
if isinstance(richsync_body, str):
lrc_text = _parse_richsync(richsync_body)
if lrc_text:
lrc = LRCData(lrc_text)
if lrc:
logger.debug("Musixmatch: got richsync lyrics")
return lrc
# Fall back to subtitle (line-level timing)
subtitle_msg = macro_calls.get("track.subtitles.get", {}).get("message", {})
if (
isinstance(subtitle_msg, dict)
and subtitle_msg.get("header", {}).get("status_code") == 200
):
subtitle_list = subtitle_msg.get("body", {}).get("subtitle_list", [])
if isinstance(subtitle_list, list) and subtitle_list:
subtitle_body = subtitle_list[0].get("subtitle", {}).get("subtitle_body")
if isinstance(subtitle_body, str):
lrc_text = _parse_subtitle(subtitle_body)
if lrc_text:
lrc = LRCData(lrc_text)
if lrc:
logger.debug("Musixmatch: got subtitle lyrics")
return lrc
logger.debug("Musixmatch: no usable lyrics in macro response")
return None
class MusixmatchSpotifyFetcher(BaseFetcher):
"""Direct lookup by Spotify track ID — no search, single request."""
@property
def source_name(self) -> str:
return "musixmatch-spotify"
def is_available(self, track: TrackMeta) -> bool:
return bool(track.trackid) and bool(MUSIXMATCH_USERTOKEN)
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
logger.info(f"Musixmatch-Spotify: fetching lyrics for {track.display_name()}")
try:
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
lrc = await _fetch_macro(
client,
{
"track_spotify_id": track.trackid, # type: ignore[dict-item]
"usertoken": MUSIXMATCH_USERTOKEN,
},
)
except Exception as e:
logger.error(f"Musixmatch-Spotify: fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
if lrc is None:
logger.debug(
f"Musixmatch-Spotify: no lyrics found for {track.display_name()}"
)
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
logger.info(f"Musixmatch-Spotify: got SUCCESS_SYNCED lyrics ({len(lrc)} lines)")
return LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrc,
source=self.source_name,
)
class MusixmatchFetcher(BaseFetcher):
"""Metadata search + multi-candidate fallback."""
@property
def source_name(self) -> str:
return "musixmatch"
def is_available(self, track: TrackMeta) -> bool:
return bool(track.title) and bool(MUSIXMATCH_USERTOKEN)
async def _search(self, track: TrackMeta) -> tuple[Optional[int], float]:
params: dict[str, str] = {
"format": "json",
"app_id": "web-desktop-app-v1.0",
"q_track": track.title or "",
"usertoken": MUSIXMATCH_USERTOKEN,
"page_size": "10",
"f_has_lyrics": "1",
}
if track.artist:
params["q_artist"] = track.artist
if track.album:
params["q_album"] = track.album
url = f"{MUSIXMATCH_SEARCH_URL}?{urlencode(params)}"
logger.debug(f"Musixmatch: searching for '{track.display_name()}'")
try:
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
resp = await client.get(url, headers=_MXM_HEADERS)
resp.raise_for_status()
data = resp.json()
track_list = data.get("message", {}).get("body", {}).get("track_list", [])
if not isinstance(track_list, list) or not track_list:
logger.debug("Musixmatch: search returned 0 results")
return None, 0.0
logger.debug(f"Musixmatch: search returned {len(track_list)} candidates")
candidates = [
SearchCandidate(
item=int(t["commontrack_id"]),
duration_ms=(
float(t["track_length"]) * 1000
if t.get("track_length")
else None
),
is_synced=bool(t.get("has_subtitles") or t.get("has_richsync")),
title=t.get("track_name"),
artist=t.get("artist_name"),
album=t.get("album_name"),
)
for item in track_list
if isinstance(item, dict)
and isinstance(t := item.get("track", {}), dict)
and isinstance(t.get("commontrack_id"), int)
and not t.get("instrumental")
]
best_id, confidence = select_best(
candidates,
track.length,
title=track.title,
artist=track.artist,
album=track.album,
)
if best_id is not None:
logger.debug(
f"Musixmatch: best candidate id={best_id} ({confidence:.0f})"
)
else:
logger.debug("Musixmatch: no suitable candidate found")
return best_id, confidence
except Exception as e:
logger.error(f"Musixmatch: search failed: {e}")
return None, 0.0
async def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
logger.info(f"Musixmatch: fetching lyrics for {track.display_name()}")
commontrack_id, confidence = await self._search(track)
if commontrack_id is None:
logger.debug(f"Musixmatch: no match found for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
try:
async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
lrc = await _fetch_macro(
client,
{
"commontrack_id": str(commontrack_id),
"usertoken": MUSIXMATCH_USERTOKEN,
},
)
except Exception as e:
logger.error(f"Musixmatch: fetch failed: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
if lrc is None:
logger.debug(f"Musixmatch: no lyrics for commontrack_id={commontrack_id}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
logger.info(
f"Musixmatch: got SUCCESS_SYNCED lyrics "
f"for commontrack_id={commontrack_id} ({len(lrc)} lines)"
)
return LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lrc,
source=self.source_name,
confidence=confidence,
)
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "lrx-cli" name = "lrx-cli"
version = "0.4.4" version = "0.4.5"
description = "Fetch line-synced lyrics for your music player." description = "Fetch line-synced lyrics for your music player."
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
Generated
+1 -1
View File
@@ -153,7 +153,7 @@ wheels = [
[[package]] [[package]]
name = "lrx-cli" name = "lrx-cli"
version = "0.4.3" version = "0.4.4"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "cyclopts" }, { name = "cyclopts" },