From 2f8004581bd7e8ff2099c57bb0ea45c5d53665c6 Mon Sep 17 00:00:00 2001 From: Uyanide Date: Fri, 3 Apr 2026 22:16:49 +0200 Subject: [PATCH] feat: fetcher: try multiple candidates rather just the best one --- lrx_cli/config.py | 4 ++ lrx_cli/fetchers/__init__.py | 2 +- lrx_cli/fetchers/netease.py | 49 ++++++++++++-------- lrx_cli/fetchers/qqmusic.py | 47 +++++++++++-------- lrx_cli/fetchers/selection.py | 27 +++++++++++ pyproject.toml | 2 +- tests/test_selection.py | 86 +++++++++++++++++++++++++++++++++-- uv.lock | 2 +- 8 files changed, 176 insertions(+), 43 deletions(-) diff --git a/lrx_cli/config.py b/lrx_cli/config.py index f5d62f2..03a5c61 100644 --- a/lrx_cli/config.py +++ b/lrx_cli/config.py @@ -49,6 +49,10 @@ SCORE_W_SYNCED = 10.0 MIN_CONFIDENCE = 25.0 # below this, candidate is rejected HIGH_CONFIDENCE = 80.0 # at or above this, stop searching early +# Multi-candidate fetching +MULTI_CANDIDATE_LIMIT = 3 # max candidates to try per search-based fetcher +MULTI_CANDIDATE_DELAY_S = 0.2 # delay between sequential lyric fetches + # Legacy cache rows (no confidence stored) get a base score by sync status LEGACY_CONFIDENCE_SYNCED = 50.0 LEGACY_CONFIDENCE_UNSYNCED = 40.0 diff --git a/lrx_cli/fetchers/__init__.py b/lrx_cli/fetchers/__init__.py index bd99afa..9dd199e 100644 --- a/lrx_cli/fetchers/__init__.py +++ b/lrx_cli/fetchers/__init__.py @@ -29,7 +29,7 @@ FetcherMethodType = Literal[ ] # Fetchers within a group run in parallel; groups run sequentially. -# A group that produces any positive result stops the pipeline. +# A group that produces any trusted and synced result stops the pipeline. _FETCHER_GROUPS: list[list[FetcherMethodType]] = [ ["local"], ["cache-search"], diff --git a/lrx_cli/fetchers/netease.py b/lrx_cli/fetchers/netease.py index adc182a..575b3e1 100644 --- a/lrx_cli/fetchers/netease.py +++ b/lrx_cli/fetchers/netease.py @@ -12,18 +12,20 @@ Search results are filtered by duration when the track has a known length to avoid returning lyrics for the wrong version of a song. """ +import asyncio from typing import Optional import httpx from loguru import logger from .base import BaseFetcher -from .selection import SearchCandidate, select_best +from .selection import SearchCandidate, select_ranked from ..models import TrackMeta, LyricResult, CacheStatus from ..lrc import LRCData from ..config import ( HTTP_TIMEOUT, TTL_NOT_FOUND, TTL_NETWORK_ERROR, + MULTI_CANDIDATE_DELAY_S, NETEASE_SEARCH_URL, NETEASE_LYRIC_URL, UA_BROWSER, @@ -45,10 +47,10 @@ class NeteaseFetcher(BaseFetcher): async def _search( self, track: TrackMeta, limit: int = 10 - ) -> tuple[Optional[int], float]: + ) -> list[tuple[int, float]]: query = f"{track.artist or ''} {track.title or ''}".strip() if not query: - return None, 0.0 + return [] logger.debug(f"Netease: searching for '{query}' (limit={limit})") @@ -66,23 +68,23 @@ class NeteaseFetcher(BaseFetcher): logger.error( f"Netease: search returned non-dict: {type(result).__name__}" ) - return None, 0.0 + return [] result_body = result.get("result") if not isinstance(result_body, dict): logger.debug("Netease: search 'result' field missing or invalid") - return None, 0.0 + return [] songs = result_body.get("songs") if not isinstance(songs, list) or len(songs) == 0: logger.debug("Netease: search returned 0 results") - return None, 0.0 + return [] logger.debug(f"Netease: search returned {len(songs)} candidates") candidates = [ SearchCandidate( - item=song.get("id"), + item=song_id, duration_ms=float(song["dt"]) if isinstance(song.get("dt"), int) else None, @@ -92,27 +94,27 @@ class NeteaseFetcher(BaseFetcher): album=(song.get("al") or {}).get("name"), ) for song in songs - if isinstance(song, dict) and song.get("id") is not None + if isinstance(song, dict) and isinstance(song_id := song.get("id"), int) ] - best_id, confidence = select_best( + ranked = select_ranked( candidates, track.length, title=track.title, artist=track.artist, album=track.album, ) - if best_id is not None: + if ranked: logger.debug( - f"Netease: selected id={best_id} (confidence={confidence:.0f})" + "Netease: top candidates: " + + ", ".join(f"id={i} ({c:.0f})" for i, c in ranked) ) - return best_id, confidence - - logger.debug("Netease: no suitable candidate found") - return None, 0.0 + else: + logger.debug("Netease: no suitable candidate found") + return ranked except Exception as e: logger.error(f"Netease: search failed: {e}") - return None, 0.0 + return [] async def _get_lyric( self, song_id: int, confidence: float = 0.0 @@ -185,9 +187,18 @@ class NeteaseFetcher(BaseFetcher): return None logger.info(f"Netease: fetching lyrics for {track.display_name()}") - song_id, confidence = await self._search(track) - if not song_id: + candidates = await self._search(track) + if not candidates: logger.debug(f"Netease: no match found for {track.display_name()}") return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) - return await self._get_lyric(song_id, confidence=confidence) + for i, (song_id, confidence) in enumerate(candidates): + if i > 0: + await asyncio.sleep(MULTI_CANDIDATE_DELAY_S) + result = await self._get_lyric(song_id, confidence=confidence) + if result is None or result.status == CacheStatus.NETWORK_ERROR: + return result + if result.status != CacheStatus.NOT_FOUND: + return result + + return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) diff --git a/lrx_cli/fetchers/qqmusic.py b/lrx_cli/fetchers/qqmusic.py index 21dbe04..d160c82 100644 --- a/lrx_cli/fetchers/qqmusic.py +++ b/lrx_cli/fetchers/qqmusic.py @@ -11,18 +11,20 @@ The base URL is read from the QQ_MUSIC_API_URL environment variable. Search → pick best match by duration → fetch LRC lyrics. """ +import asyncio from typing import Optional import httpx from loguru import logger from .base import BaseFetcher -from .selection import SearchCandidate, select_best +from .selection import SearchCandidate, select_ranked from ..models import TrackMeta, LyricResult, CacheStatus from ..lrc import LRCData from ..config import ( HTTP_TIMEOUT, TTL_NOT_FOUND, TTL_NETWORK_ERROR, + MULTI_CANDIDATE_DELAY_S, QQ_MUSIC_API_URL, ) @@ -37,10 +39,10 @@ class QQMusicFetcher(BaseFetcher): async def _search( self, track: TrackMeta, limit: int = 10 - ) -> tuple[Optional[str], float]: + ) -> list[tuple[str, float]]: query = f"{track.artist or ''} {track.title or ''}".strip() if not query: - return None, 0.0 + return [] logger.debug(f"QQMusic: searching for '{query}' (limit={limit})") @@ -55,18 +57,18 @@ class QQMusicFetcher(BaseFetcher): if data.get("code") != 0: logger.error(f"QQMusic: search API error: {data}") - return None, 0.0 + return [] songs = data.get("data", {}).get("list", []) if not songs: logger.debug("QQMusic: search returned 0 results") - return None, 0.0 + return [] logger.debug(f"QQMusic: search returned {len(songs)} candidates") candidates = [ SearchCandidate( - item=song.get("mid"), + item=mid, duration_ms=float(song["interval"]) * 1000 if isinstance(song.get("interval"), int) else None, @@ -76,27 +78,27 @@ class QQMusicFetcher(BaseFetcher): album=(song.get("album") or {}).get("name"), ) for song in songs - if isinstance(song, dict) and song.get("mid") is not None + if isinstance(song, dict) and isinstance(mid := song.get("mid"), str) ] - best_mid, confidence = select_best( + ranked = select_ranked( candidates, track.length, title=track.title, artist=track.artist, album=track.album, ) - if best_mid is not None: + if ranked: logger.debug( - f"QQMusic: selected mid={best_mid} (confidence={confidence:.0f})" + "QQMusic: top candidates: " + + ", ".join(f"mid={m} ({c:.0f})" for m, c in ranked) ) - return best_mid, confidence - - logger.debug("QQMusic: no suitable candidate found") - return None, 0.0 + else: + logger.debug("QQMusic: no suitable candidate found") + return ranked except Exception as e: logger.error(f"QQMusic: search failed: {e}") - return None, 0.0 + return [] async def _get_lyric( self, mid: str, confidence: float = 0.0 @@ -152,9 +154,18 @@ class QQMusicFetcher(BaseFetcher): return None logger.info(f"QQMusic: fetching lyrics for {track.display_name()}") - mid, confidence = await self._search(track) - if not mid: + candidates = await self._search(track) + if not candidates: logger.debug(f"QQMusic: no match found for {track.display_name()}") return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) - return await self._get_lyric(mid, confidence=confidence) + for i, (mid, confidence) in enumerate(candidates): + if i > 0: + await asyncio.sleep(MULTI_CANDIDATE_DELAY_S) + result = await self._get_lyric(mid, confidence=confidence) + if result is None or result.status == CacheStatus.NETWORK_ERROR: + return result + if result.status != CacheStatus.NOT_FOUND: + return result + + return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND) diff --git a/lrx_cli/fetchers/selection.py b/lrx_cli/fetchers/selection.py index 7ed99a5..92c416c 100644 --- a/lrx_cli/fetchers/selection.py +++ b/lrx_cli/fetchers/selection.py @@ -11,6 +11,7 @@ from typing import Generic, Optional, TypeVar from ..config import ( DURATION_TOLERANCE_MS, + MULTI_CANDIDATE_LIMIT, SCORE_W_TITLE as _W_TITLE, SCORE_W_ARTIST as _W_ARTIST, SCORE_W_ALBUM as _W_ALBUM, @@ -143,6 +144,32 @@ def _score_candidate( return metadata_score + synced_score +def select_ranked( + candidates: list[SearchCandidate[T]], + track_length_ms: Optional[int] = None, + *, + title: Optional[str] = None, + artist: Optional[str] = None, + album: Optional[str] = None, + min_confidence: float = MIN_CONFIDENCE, + max_results: int = MULTI_CANDIDATE_LIMIT, +) -> list[tuple[T, float]]: + """Score candidates and return top max_results above min_confidence, sorted by score descending.""" + scored: list[tuple[T, float]] = [] + for c in candidates: + if ( + track_length_ms is not None + and c.duration_ms is not None + and abs(c.duration_ms - track_length_ms) > DURATION_TOLERANCE_MS + ): + continue + s = _score_candidate(c, title, artist, album, track_length_ms) + if s >= min_confidence: + scored.append((c.item, s)) + scored.sort(key=lambda x: x[1], reverse=True) + return scored[:max_results] + + def select_best( candidates: list[SearchCandidate[T]], track_length_ms: Optional[int] = None, diff --git a/pyproject.toml b/pyproject.toml index f781708..b0a2b93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "lrx-cli" -version = "0.4.1" +version = "0.4.2" description = "Fetch line-synced lyrics for your music player." readme = "README.md" requires-python = ">=3.13" diff --git a/tests/test_selection.py b/tests/test_selection.py index 90511b8..a2ec56f 100644 --- a/tests/test_selection.py +++ b/tests/test_selection.py @@ -3,6 +3,7 @@ from __future__ import annotations from lrx_cli.fetchers.selection import ( SearchCandidate, select_best, + select_ranked, _score_candidate, _text_similarity, MIN_CONFIDENCE, @@ -407,9 +408,6 @@ def test_netease_without_ref_metadata_rejects_below_confidence() -> None: assert best is None -# --- Edge cases --- - - def test_empty_candidates_returns_none() -> None: assert select_best([], track_length_ms=5000) == (None, 0.0) assert select_best([], track_length_ms=None) == (None, 0.0) @@ -445,3 +443,85 @@ def test_generic_type_preserved() -> None: dict_candidates = [SearchCandidate(item={"id": 1}, title="x")] best, _ = select_best(dict_candidates, title="x") assert best == {"id": 1} + + +def test_select_ranked_empty_input() -> None: + assert select_ranked([]) == [] + + +def test_select_ranked_all_below_confidence() -> None: + """All candidates below threshold → empty list.""" + candidates = [ + SearchCandidate(item="x", title="Completely Different", duration_ms=999999.0) + ] + result = select_ranked( + candidates, 232000, title="My Love", artist="Westlife", min_confidence=90.0 + ) + assert result == [] + + +def test_select_ranked_sorted_descending() -> None: + """Results are ordered highest score first.""" + candidates = _netease_candidates() + ranked = select_ranked( + candidates, + _REF_LENGTH, + title=_REF_TITLE, + artist=_REF_ARTIST, + album=_REF_ALBUM, + ) + assert len(ranked) >= 2 + scores = [score for _, score in ranked] + assert scores == sorted(scores, reverse=True) + + +def test_select_ranked_respects_max_results() -> None: + candidates = _netease_candidates() + ranked = select_ranked( + candidates, + _REF_LENGTH, + title=_REF_TITLE, + artist=_REF_ARTIST, + album=_REF_ALBUM, + max_results=2, + ) + assert len(ranked) <= 2 + + +def test_select_ranked_consistent_with_select_best() -> None: + """First result of select_ranked matches select_best.""" + candidates = _netease_candidates() + kwargs = dict(title=_REF_TITLE, artist=_REF_ARTIST, album=_REF_ALBUM) + ranked = select_ranked(candidates, _REF_LENGTH, **kwargs) # type: ignore + best_item, best_score = select_best(candidates, _REF_LENGTH, **kwargs) # type: ignore + assert ranked[0] == (best_item, best_score) + + +def test_select_ranked_duration_hard_filter_applies() -> None: + """Candidates outside duration tolerance are excluded from ranked results.""" + candidates = _netease_candidates() + ranked = select_ranked( + candidates, + _REF_LENGTH, + title=_REF_TITLE, + artist=_REF_ARTIST, + album=_REF_ALBUM, + ) + ids = [item for item, _ in ranked] + # 29809886 (dt=262000, diff=30000ms) and 20707713 (dt=241116, diff=9116ms) + # both exceed DURATION_TOLERANCE_MS=3000 → must not appear + assert 29809886 not in ids + assert 20707713 not in ids + + +def test_select_ranked_netease_top_is_best_duration_match() -> None: + """2080607 (diff=59ms) should rank first over 572412968 (diff=1000ms).""" + candidates = _netease_candidates() + ranked = select_ranked( + candidates, + _REF_LENGTH, + title=_REF_TITLE, + artist=_REF_ARTIST, + album=_REF_ALBUM, + ) + assert ranked[0][0] == 2080607 diff --git a/uv.lock b/uv.lock index 6f37c3f..1a6cd93 100644 --- a/uv.lock +++ b/uv.lock @@ -153,7 +153,7 @@ wheels = [ [[package]] name = "lrx-cli" -version = "0.4.0" +version = "0.4.1" source = { editable = "." } dependencies = [ { name = "cyclopts" },