From 2f8004581bd7e8ff2099c57bb0ea45c5d53665c6 Mon Sep 17 00:00:00 2001
From: Uyanide <pywang0608@foxmail.com>
Date: Fri, 3 Apr 2026 22:16:49 +0200
Subject: [PATCH] feat: fetcher: try multiple candidates rather just the best
 one

---
 lrx_cli/config.py             |  4 ++
 lrx_cli/fetchers/__init__.py  |  2 +-
 lrx_cli/fetchers/netease.py   | 49 ++++++++++++--------
 lrx_cli/fetchers/qqmusic.py   | 47 +++++++++++--------
 lrx_cli/fetchers/selection.py | 27 +++++++++++
 pyproject.toml                |  2 +-
 tests/test_selection.py       | 86 +++++++++++++++++++++++++++++++++--
 uv.lock                       |  2 +-
 8 files changed, 176 insertions(+), 43 deletions(-)

diff --git a/lrx_cli/config.py b/lrx_cli/config.py
index f5d62f2..03a5c61 100644
--- a/lrx_cli/config.py
+++ b/lrx_cli/config.py
@@ -49,6 +49,10 @@ SCORE_W_SYNCED = 10.0
 MIN_CONFIDENCE = 25.0  # below this, candidate is rejected
 HIGH_CONFIDENCE = 80.0  # at or above this, stop searching early
 
+# Multi-candidate fetching
+MULTI_CANDIDATE_LIMIT = 3  # max candidates to try per search-based fetcher
+MULTI_CANDIDATE_DELAY_S = 0.2  # delay between sequential lyric fetches
+
 # Legacy cache rows (no confidence stored) get a base score by sync status
 LEGACY_CONFIDENCE_SYNCED = 50.0
 LEGACY_CONFIDENCE_UNSYNCED = 40.0
diff --git a/lrx_cli/fetchers/__init__.py b/lrx_cli/fetchers/__init__.py
index bd99afa..9dd199e 100644
--- a/lrx_cli/fetchers/__init__.py
+++ b/lrx_cli/fetchers/__init__.py
@@ -29,7 +29,7 @@ FetcherMethodType = Literal[
 ]
 
 # Fetchers within a group run in parallel; groups run sequentially.
-# A group that produces any positive result stops the pipeline.
+# A group that produces any trusted and synced result stops the pipeline.
 _FETCHER_GROUPS: list[list[FetcherMethodType]] = [
     ["local"],
     ["cache-search"],
diff --git a/lrx_cli/fetchers/netease.py b/lrx_cli/fetchers/netease.py
index adc182a..575b3e1 100644
--- a/lrx_cli/fetchers/netease.py
+++ b/lrx_cli/fetchers/netease.py
@@ -12,18 +12,20 @@ Search results are filtered by duration when the track has a known length
 to avoid returning lyrics for the wrong version of a song.
 """
 
+import asyncio
 from typing import Optional
 import httpx
 from loguru import logger
 
 from .base import BaseFetcher
-from .selection import SearchCandidate, select_best
+from .selection import SearchCandidate, select_ranked
 from ..models import TrackMeta, LyricResult, CacheStatus
 from ..lrc import LRCData
 from ..config import (
     HTTP_TIMEOUT,
     TTL_NOT_FOUND,
     TTL_NETWORK_ERROR,
+    MULTI_CANDIDATE_DELAY_S,
     NETEASE_SEARCH_URL,
     NETEASE_LYRIC_URL,
     UA_BROWSER,
@@ -45,10 +47,10 @@ class NeteaseFetcher(BaseFetcher):
 
     async def _search(
         self, track: TrackMeta, limit: int = 10
-    ) -> tuple[Optional[int], float]:
+    ) -> list[tuple[int, float]]:
         query = f"{track.artist or ''} {track.title or ''}".strip()
         if not query:
-            return None, 0.0
+            return []
 
         logger.debug(f"Netease: searching for '{query}' (limit={limit})")
 
@@ -66,23 +68,23 @@ class NeteaseFetcher(BaseFetcher):
                 logger.error(
                     f"Netease: search returned non-dict: {type(result).__name__}"
                 )
-                return None, 0.0
+                return []
 
             result_body = result.get("result")
             if not isinstance(result_body, dict):
                 logger.debug("Netease: search 'result' field missing or invalid")
-                return None, 0.0
+                return []
 
             songs = result_body.get("songs")
             if not isinstance(songs, list) or len(songs) == 0:
                 logger.debug("Netease: search returned 0 results")
-                return None, 0.0
+                return []
 
             logger.debug(f"Netease: search returned {len(songs)} candidates")
 
             candidates = [
                 SearchCandidate(
-                    item=song.get("id"),
+                    item=song_id,
                     duration_ms=float(song["dt"])
                     if isinstance(song.get("dt"), int)
                     else None,
@@ -92,27 +94,27 @@ class NeteaseFetcher(BaseFetcher):
                     album=(song.get("al") or {}).get("name"),
                 )
                 for song in songs
-                if isinstance(song, dict) and song.get("id") is not None
+                if isinstance(song, dict) and isinstance(song_id := song.get("id"), int)
             ]
-            best_id, confidence = select_best(
+            ranked = select_ranked(
                 candidates,
                 track.length,
                 title=track.title,
                 artist=track.artist,
                 album=track.album,
             )
-            if best_id is not None:
+            if ranked:
                 logger.debug(
-                    f"Netease: selected id={best_id} (confidence={confidence:.0f})"
+                    "Netease: top candidates: "
+                    + ", ".join(f"id={i} ({c:.0f})" for i, c in ranked)
                 )
-                return best_id, confidence
-
-            logger.debug("Netease: no suitable candidate found")
-            return None, 0.0
+            else:
+                logger.debug("Netease: no suitable candidate found")
+            return ranked
 
         except Exception as e:
             logger.error(f"Netease: search failed: {e}")
-            return None, 0.0
+            return []
 
     async def _get_lyric(
         self, song_id: int, confidence: float = 0.0
@@ -185,9 +187,18 @@ class NeteaseFetcher(BaseFetcher):
             return None
 
         logger.info(f"Netease: fetching lyrics for {track.display_name()}")
-        song_id, confidence = await self._search(track)
-        if not song_id:
+        candidates = await self._search(track)
+        if not candidates:
             logger.debug(f"Netease: no match found for {track.display_name()}")
             return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
 
-        return await self._get_lyric(song_id, confidence=confidence)
+        for i, (song_id, confidence) in enumerate(candidates):
+            if i > 0:
+                await asyncio.sleep(MULTI_CANDIDATE_DELAY_S)
+            result = await self._get_lyric(song_id, confidence=confidence)
+            if result is None or result.status == CacheStatus.NETWORK_ERROR:
+                return result
+            if result.status != CacheStatus.NOT_FOUND:
+                return result
+
+        return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
diff --git a/lrx_cli/fetchers/qqmusic.py b/lrx_cli/fetchers/qqmusic.py
index 21dbe04..d160c82 100644
--- a/lrx_cli/fetchers/qqmusic.py
+++ b/lrx_cli/fetchers/qqmusic.py
@@ -11,18 +11,20 @@ The base URL is read from the QQ_MUSIC_API_URL environment variable.
 Search → pick best match by duration → fetch LRC lyrics.
 """
 
+import asyncio
 from typing import Optional
 import httpx
 from loguru import logger
 
 from .base import BaseFetcher
-from .selection import SearchCandidate, select_best
+from .selection import SearchCandidate, select_ranked
 from ..models import TrackMeta, LyricResult, CacheStatus
 from ..lrc import LRCData
 from ..config import (
     HTTP_TIMEOUT,
     TTL_NOT_FOUND,
     TTL_NETWORK_ERROR,
+    MULTI_CANDIDATE_DELAY_S,
     QQ_MUSIC_API_URL,
 )
 
@@ -37,10 +39,10 @@ class QQMusicFetcher(BaseFetcher):
 
     async def _search(
         self, track: TrackMeta, limit: int = 10
-    ) -> tuple[Optional[str], float]:
+    ) -> list[tuple[str, float]]:
         query = f"{track.artist or ''} {track.title or ''}".strip()
         if not query:
-            return None, 0.0
+            return []
 
         logger.debug(f"QQMusic: searching for '{query}' (limit={limit})")
 
@@ -55,18 +57,18 @@ class QQMusicFetcher(BaseFetcher):
 
             if data.get("code") != 0:
                 logger.error(f"QQMusic: search API error: {data}")
-                return None, 0.0
+                return []
 
             songs = data.get("data", {}).get("list", [])
             if not songs:
                 logger.debug("QQMusic: search returned 0 results")
-                return None, 0.0
+                return []
 
             logger.debug(f"QQMusic: search returned {len(songs)} candidates")
 
             candidates = [
                 SearchCandidate(
-                    item=song.get("mid"),
+                    item=mid,
                     duration_ms=float(song["interval"]) * 1000
                     if isinstance(song.get("interval"), int)
                     else None,
@@ -76,27 +78,27 @@ class QQMusicFetcher(BaseFetcher):
                     album=(song.get("album") or {}).get("name"),
                 )
                 for song in songs
-                if isinstance(song, dict) and song.get("mid") is not None
+                if isinstance(song, dict) and isinstance(mid := song.get("mid"), str)
             ]
-            best_mid, confidence = select_best(
+            ranked = select_ranked(
                 candidates,
                 track.length,
                 title=track.title,
                 artist=track.artist,
                 album=track.album,
             )
-            if best_mid is not None:
+            if ranked:
                 logger.debug(
-                    f"QQMusic: selected mid={best_mid} (confidence={confidence:.0f})"
+                    "QQMusic: top candidates: "
+                    + ", ".join(f"mid={m} ({c:.0f})" for m, c in ranked)
                 )
-                return best_mid, confidence
-
-            logger.debug("QQMusic: no suitable candidate found")
-            return None, 0.0
+            else:
+                logger.debug("QQMusic: no suitable candidate found")
+            return ranked
 
         except Exception as e:
             logger.error(f"QQMusic: search failed: {e}")
-            return None, 0.0
+            return []
 
     async def _get_lyric(
         self, mid: str, confidence: float = 0.0
@@ -152,9 +154,18 @@ class QQMusicFetcher(BaseFetcher):
             return None
 
         logger.info(f"QQMusic: fetching lyrics for {track.display_name()}")
-        mid, confidence = await self._search(track)
-        if not mid:
+        candidates = await self._search(track)
+        if not candidates:
             logger.debug(f"QQMusic: no match found for {track.display_name()}")
             return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
 
-        return await self._get_lyric(mid, confidence=confidence)
+        for i, (mid, confidence) in enumerate(candidates):
+            if i > 0:
+                await asyncio.sleep(MULTI_CANDIDATE_DELAY_S)
+            result = await self._get_lyric(mid, confidence=confidence)
+            if result is None or result.status == CacheStatus.NETWORK_ERROR:
+                return result
+            if result.status != CacheStatus.NOT_FOUND:
+                return result
+
+        return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
diff --git a/lrx_cli/fetchers/selection.py b/lrx_cli/fetchers/selection.py
index 7ed99a5..92c416c 100644
--- a/lrx_cli/fetchers/selection.py
+++ b/lrx_cli/fetchers/selection.py
@@ -11,6 +11,7 @@ from typing import Generic, Optional, TypeVar
 
 from ..config import (
     DURATION_TOLERANCE_MS,
+    MULTI_CANDIDATE_LIMIT,
     SCORE_W_TITLE as _W_TITLE,
     SCORE_W_ARTIST as _W_ARTIST,
     SCORE_W_ALBUM as _W_ALBUM,
@@ -143,6 +144,32 @@ def _score_candidate(
     return metadata_score + synced_score
 
 
+def select_ranked(
+    candidates: list[SearchCandidate[T]],
+    track_length_ms: Optional[int] = None,
+    *,
+    title: Optional[str] = None,
+    artist: Optional[str] = None,
+    album: Optional[str] = None,
+    min_confidence: float = MIN_CONFIDENCE,
+    max_results: int = MULTI_CANDIDATE_LIMIT,
+) -> list[tuple[T, float]]:
+    """Score candidates and return top max_results above min_confidence, sorted by score descending."""
+    scored: list[tuple[T, float]] = []
+    for c in candidates:
+        if (
+            track_length_ms is not None
+            and c.duration_ms is not None
+            and abs(c.duration_ms - track_length_ms) > DURATION_TOLERANCE_MS
+        ):
+            continue
+        s = _score_candidate(c, title, artist, album, track_length_ms)
+        if s >= min_confidence:
+            scored.append((c.item, s))
+    scored.sort(key=lambda x: x[1], reverse=True)
+    return scored[:max_results]
+
+
 def select_best(
     candidates: list[SearchCandidate[T]],
     track_length_ms: Optional[int] = None,
diff --git a/pyproject.toml b/pyproject.toml
index f781708..b0a2b93 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "lrx-cli"
-version = "0.4.1"
+version = "0.4.2"
 description = "Fetch line-synced lyrics for your music player."
 readme = "README.md"
 requires-python = ">=3.13"
diff --git a/tests/test_selection.py b/tests/test_selection.py
index 90511b8..a2ec56f 100644
--- a/tests/test_selection.py
+++ b/tests/test_selection.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 from lrx_cli.fetchers.selection import (
     SearchCandidate,
     select_best,
+    select_ranked,
     _score_candidate,
     _text_similarity,
     MIN_CONFIDENCE,
@@ -407,9 +408,6 @@ def test_netease_without_ref_metadata_rejects_below_confidence() -> None:
     assert best is None
 
 
-# --- Edge cases ---
-
-
 def test_empty_candidates_returns_none() -> None:
     assert select_best([], track_length_ms=5000) == (None, 0.0)
     assert select_best([], track_length_ms=None) == (None, 0.0)
@@ -445,3 +443,85 @@ def test_generic_type_preserved() -> None:
     dict_candidates = [SearchCandidate(item={"id": 1}, title="x")]
     best, _ = select_best(dict_candidates, title="x")
     assert best == {"id": 1}
+
+
+def test_select_ranked_empty_input() -> None:
+    assert select_ranked([]) == []
+
+
+def test_select_ranked_all_below_confidence() -> None:
+    """All candidates below threshold → empty list."""
+    candidates = [
+        SearchCandidate(item="x", title="Completely Different", duration_ms=999999.0)
+    ]
+    result = select_ranked(
+        candidates, 232000, title="My Love", artist="Westlife", min_confidence=90.0
+    )
+    assert result == []
+
+
+def test_select_ranked_sorted_descending() -> None:
+    """Results are ordered highest score first."""
+    candidates = _netease_candidates()
+    ranked = select_ranked(
+        candidates,
+        _REF_LENGTH,
+        title=_REF_TITLE,
+        artist=_REF_ARTIST,
+        album=_REF_ALBUM,
+    )
+    assert len(ranked) >= 2
+    scores = [score for _, score in ranked]
+    assert scores == sorted(scores, reverse=True)
+
+
+def test_select_ranked_respects_max_results() -> None:
+    candidates = _netease_candidates()
+    ranked = select_ranked(
+        candidates,
+        _REF_LENGTH,
+        title=_REF_TITLE,
+        artist=_REF_ARTIST,
+        album=_REF_ALBUM,
+        max_results=2,
+    )
+    assert len(ranked) <= 2
+
+
+def test_select_ranked_consistent_with_select_best() -> None:
+    """First result of select_ranked matches select_best."""
+    candidates = _netease_candidates()
+    kwargs = dict(title=_REF_TITLE, artist=_REF_ARTIST, album=_REF_ALBUM)
+    ranked = select_ranked(candidates, _REF_LENGTH, **kwargs)  # type: ignore
+    best_item, best_score = select_best(candidates, _REF_LENGTH, **kwargs)  # type: ignore
+    assert ranked[0] == (best_item, best_score)
+
+
+def test_select_ranked_duration_hard_filter_applies() -> None:
+    """Candidates outside duration tolerance are excluded from ranked results."""
+    candidates = _netease_candidates()
+    ranked = select_ranked(
+        candidates,
+        _REF_LENGTH,
+        title=_REF_TITLE,
+        artist=_REF_ARTIST,
+        album=_REF_ALBUM,
+    )
+    ids = [item for item, _ in ranked]
+    # 29809886 (dt=262000, diff=30000ms) and 20707713 (dt=241116, diff=9116ms)
+    # both exceed DURATION_TOLERANCE_MS=3000 → must not appear
+    assert 29809886 not in ids
+    assert 20707713 not in ids
+
+
+def test_select_ranked_netease_top_is_best_duration_match() -> None:
+    """2080607 (diff=59ms) should rank first over 572412968 (diff=1000ms)."""
+    candidates = _netease_candidates()
+    ranked = select_ranked(
+        candidates,
+        _REF_LENGTH,
+        title=_REF_TITLE,
+        artist=_REF_ARTIST,
+        album=_REF_ALBUM,
+    )
+    assert ranked[0][0] == 2080607
diff --git a/uv.lock b/uv.lock
index 6f37c3f..1a6cd93 100644
--- a/uv.lock
+++ b/uv.lock
@@ -153,7 +153,7 @@ wheels = [
 
 [[package]]
 name = "lrx-cli"
-version = "0.4.0"
+version = "0.4.1"
 source = { editable = "." }
 dependencies = [
     { name = "cyclopts" },