Files
lrx-cli/lrx_cli/fetchers/lrclib_search.py
T

182 lines
6.5 KiB
Python

"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 05:30:50
Description: LRCLIB search fetcher — fuzzy search via lrclib.net /api/search
"""
"""
Used when metadata is incomplete (no album or duration) but title is available.
Selects the best match by duration when track length is known.
"""
import httpx
from typing import Optional
from loguru import logger
from urllib.parse import urlencode
from .base import BaseFetcher
from .selection import SearchCandidate, select_best
from ..models import TrackMeta, LyricResult, CacheStatus
from ..lrc import LRCData
from ..config import (
HTTP_TIMEOUT,
TTL_UNSYNCED,
TTL_NOT_FOUND,
TTL_NETWORK_ERROR,
LRCLIB_SEARCH_URL,
UA_LRX,
)
class LrclibSearchFetcher(BaseFetcher):
@property
def source_name(self) -> str:
return "lrclib-search"
def is_available(self, track: TrackMeta) -> bool:
return bool(track.title)
def _build_queries(self, track: TrackMeta) -> list[dict[str, str]]:
"""Build up to 4 query param sets, from most specific to least.
1. title + artist + album (if all present)
2. title + artist (if artist present)
3. title + album (if album present)
4. title only
"""
assert track.title is not None
title = track.title
queries: list[dict[str, str]] = []
if track.artist and track.album:
queries.append(
{
"track_name": title,
"artist_name": track.artist,
"album_name": track.album,
}
)
if track.artist:
queries.append({"track_name": title, "artist_name": track.artist})
if track.album:
queries.append({"track_name": title, "album_name": track.album})
queries.append({"track_name": title})
return queries
def fetch(
self, track: TrackMeta, bypass_cache: bool = False
) -> Optional[LyricResult]:
"""Search LRCLIB for lyrics. Requires at least a title."""
if not track.title:
logger.debug("LRCLIB-search: skipped — no title")
return None
queries = self._build_queries(track)
logger.info(f"LRCLIB-search: searching for {track.display_name()}")
seen_ids: set[int] = set()
candidates: list[dict] = []
had_error = False
try:
with httpx.Client(timeout=HTTP_TIMEOUT) as client:
for params in queries:
url = f"{LRCLIB_SEARCH_URL}?{urlencode(params)}"
logger.debug(f"LRCLIB-search: query {params}")
resp = client.get(url, headers={"User-Agent": UA_LRX})
if resp.status_code != 200:
logger.error(f"LRCLIB-search: API returned {resp.status_code}")
had_error = True
continue
data = resp.json()
if not isinstance(data, list):
continue
for item in data:
if not isinstance(item, dict):
continue
item_id = item.get("id")
if item_id is not None and item_id in seen_ids:
continue
if item_id is not None:
seen_ids.add(item_id)
candidates.append(item)
if not candidates:
if had_error:
return LyricResult(
status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR
)
logger.debug(f"LRCLIB-search: no results for {track.display_name()}")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
logger.debug(
f"LRCLIB-search: got {len(candidates)} unique candidates "
f"from {len(queries)} queries"
)
mapped = [
SearchCandidate(
item=item,
duration_ms=item["duration"] * 1000
if isinstance(item.get("duration"), (int, float))
else None,
is_synced=isinstance(item.get("syncedLyrics"), str)
and bool(item["syncedLyrics"].strip()),
title=item.get("trackName"),
artist=item.get("artistName"),
album=item.get("albumName"),
)
for item in candidates
]
best, confidence = select_best(
mapped,
track.length,
title=track.title,
artist=track.artist,
album=track.album,
)
if best is None:
logger.debug("LRCLIB-search: no valid candidate found")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
synced = best.get("syncedLyrics")
unsynced = best.get("plainLyrics")
if isinstance(synced, str) and synced.strip():
lyrics = LRCData(synced)
logger.info(
f"LRCLIB-search: got synced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})"
)
return LyricResult(
status=CacheStatus.SUCCESS_SYNCED,
lyrics=lyrics,
source=self.source_name,
confidence=confidence,
)
elif isinstance(unsynced, str) and unsynced.strip():
lyrics = LRCData(unsynced)
logger.info(
f"LRCLIB-search: got unsynced lyrics ({len(lyrics)} lines, confidence={confidence:.0f})"
)
return LyricResult(
status=CacheStatus.SUCCESS_UNSYNCED,
lyrics=lyrics,
source=self.source_name,
ttl=TTL_UNSYNCED,
confidence=confidence,
)
else:
logger.debug("LRCLIB-search: best candidate has empty lyrics")
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
except httpx.HTTPError as e:
logger.error(f"LRCLIB-search: HTTP error: {e}")
return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR)
except Exception as e:
logger.error(f"LRCLIB-search: unexpected error: {e}")
return None