feat: evaluate fetch results with "confidence"

This commit is contained in:
2026-04-02 04:26:19 +02:00
parent 9aaf4d8aed
commit 2df167e31d
15 changed files with 836 additions and 238 deletions
+132 -32
View File
@@ -2,13 +2,23 @@
Shared candidate-selection logic for search-based fetchers.
Each fetcher maps its API-specific results to SearchCandidate, then calls
select_best() which handles duration filtering and synced preference uniformly.
select_best() which scores candidates by metadata similarity, duration
proximity, and sync status.
"""
from dataclasses import dataclass
from typing import Generic, Optional, TypeVar
from ..config import DURATION_TOLERANCE_MS
from ..config import (
DURATION_TOLERANCE_MS,
SCORE_W_TITLE as _W_TITLE,
SCORE_W_ARTIST as _W_ARTIST,
SCORE_W_ALBUM as _W_ALBUM,
SCORE_W_DURATION as _W_DURATION,
SCORE_W_SYNCED as _W_SYNCED,
MIN_CONFIDENCE,
)
from ..normalize import normalize_for_match, normalize_artist
T = TypeVar("T")
@@ -21,48 +31,138 @@ class SearchCandidate(Generic[T]):
item: The original API-specific object (dict, ID, etc.)
duration_ms: Track duration in milliseconds, or None if unknown.
is_synced: Whether this candidate is known to have synced lyrics.
title: Candidate track title for similarity scoring.
artist: Candidate artist name for similarity scoring.
album: Candidate album name for similarity scoring.
"""
item: T
duration_ms: Optional[float] = None
is_synced: bool = False
title: Optional[str] = None
artist: Optional[str] = None
album: Optional[str] = None
def _text_similarity(a: str, b: str) -> float:
"""Compare two normalized strings. Returns 0.0-1.0."""
if a == b:
return 1.0
if not a or not b:
return 0.0
# Containment: one is a substring of the other (e.g. "My Love" vs "My Love (Album Version)")
if a in b or b in a:
return min(len(a), len(b)) / max(len(a), len(b))
return 0.0
def _score_candidate(
c: SearchCandidate[T],
ref_title: Optional[str],
ref_artist: Optional[str],
ref_album: Optional[str],
ref_length_ms: Optional[int],
) -> float:
"""Score a candidate from 0-100 based on metadata match quality.
Scoring works in two tiers:
1. **Metadata score** — computed from fields available on *both* sides,
then rescaled to fill the 0-90 range so that missing fields don't
inflate the score. Fields missing on both sides are simply excluded
from the calculation (neutral). Fields present on only one side
contribute 0 to the numerator but their weight still counts in the
denominator (penalty for asymmetric absence).
2. **Synced bonus** — a flat 10 pts, always applied independently.
Field weights (before rescaling):
- Title: 40
- Artist: 30
- Album: 10
- Duration: 10
"""
raw = 0.0
available_weight = 0.0
# Title
if ref_title is not None or c.title is not None:
available_weight += _W_TITLE
if ref_title is not None and c.title is not None:
raw += _W_TITLE * _text_similarity(
normalize_for_match(ref_title), normalize_for_match(c.title)
)
# else both None → excluded
# Artist
if ref_artist is not None or c.artist is not None:
available_weight += _W_ARTIST
if ref_artist is not None and c.artist is not None:
na = normalize_artist(ref_artist)
nb = normalize_artist(c.artist)
if na == nb:
raw += _W_ARTIST
else:
raw += _W_ARTIST * _text_similarity(
normalize_for_match(ref_artist), normalize_for_match(c.artist)
)
# Album
if ref_album is not None or c.album is not None:
available_weight += _W_ALBUM
if ref_album is not None and c.album is not None:
raw += _W_ALBUM * _text_similarity(
normalize_for_match(ref_album), normalize_for_match(c.album)
)
# Duration
if ref_length_ms is not None or c.duration_ms is not None:
available_weight += _W_DURATION
if ref_length_ms is not None and c.duration_ms is not None:
diff = abs(c.duration_ms - ref_length_ms)
if diff <= DURATION_TOLERANCE_MS:
raw += _W_DURATION * (1.0 - diff / DURATION_TOLERANCE_MS)
# Rescale metadata to 0-90 range
_MAX_METADATA = _W_TITLE + _W_ARTIST + _W_ALBUM + _W_DURATION # 90
if available_weight > 0:
metadata_score = (raw / available_weight) * _MAX_METADATA
else:
# No comparable fields at all — only synced bonus matters
metadata_score = 0.0
# Synced bonus (always 10 pts, independent of metadata)
synced_score = _W_SYNCED if c.is_synced else 0.0
return metadata_score + synced_score
def select_best(
candidates: list[SearchCandidate[T]],
track_length_ms: Optional[int] = None,
tolerance_ms: float = DURATION_TOLERANCE_MS,
) -> Optional[T]:
"""Pick the best candidate by duration proximity and sync preference.
*,
title: Optional[str] = None,
artist: Optional[str] = None,
album: Optional[str] = None,
min_confidence: float = MIN_CONFIDENCE,
) -> tuple[Optional[T], float]:
"""Pick the best candidate by confidence scoring.
When track_length_ms is available:
- Filter by tolerance_ms
- Pick closest duration, prefer synced at equal distance
When track_length_ms is unavailable:
- Pick first synced candidate, or first overall
Returns (item, score). Item is None if no candidate scores above min_confidence.
"""
if track_length_ms is not None:
best: Optional[SearchCandidate[T]] = None
best_diff = float("inf")
if not candidates:
return None, 0.0
for c in candidates:
if c.duration_ms is None:
continue
diff = abs(c.duration_ms - track_length_ms)
if diff > tolerance_ms:
continue
if diff < best_diff or (
diff == best_diff
and c.is_synced
and (best is None or not best.is_synced)
):
best_diff = diff
best = c
best_item: Optional[T] = None
best_score = -1.0
return best.item if best is not None else None
# No duration — prefer synced, fallback to first
for c in candidates:
if c.is_synced:
return c.item
return candidates[0].item if candidates else None
s = _score_candidate(c, title, artist, album, track_length_ms)
if s > best_score:
best_score = s
best_item = c.item
if best_score < min_confidence:
return None, best_score
return best_item, best_score