feat: add hard duration filter to avoid fake positive matches
This commit is contained in:
@@ -80,7 +80,8 @@ def _score_candidate(
|
||||
- Title: 40
|
||||
- Artist: 30
|
||||
- Album: 10
|
||||
- Duration: 10
|
||||
- Duration: 10 (only when reference track has duration; hard mismatch is
|
||||
pre-filtered before scoring)
|
||||
"""
|
||||
raw = 0.0
|
||||
available_weight = 0.0
|
||||
@@ -115,10 +116,15 @@ def _score_candidate(
|
||||
normalize_for_match(ref_album), normalize_for_match(c.album)
|
||||
)
|
||||
|
||||
# Duration
|
||||
if ref_length_ms is not None or c.duration_ms is not None:
|
||||
# Duration — only counted when the reference track has duration.
|
||||
# If the candidate also has duration, it contributes positively when matching
|
||||
# (hard mismatch is already filtered upstream in select_best).
|
||||
# If the candidate lacks duration, it contributes 0 to raw but still counts
|
||||
# in available_weight (penalty for missing verifiable info).
|
||||
# If the reference has no duration, duration is excluded entirely (neutral).
|
||||
if ref_length_ms is not None:
|
||||
available_weight += _W_DURATION
|
||||
if ref_length_ms is not None and c.duration_ms is not None:
|
||||
if c.duration_ms is not None:
|
||||
diff = abs(c.duration_ms - ref_length_ms)
|
||||
if diff <= DURATION_TOLERANCE_MS:
|
||||
raw += _W_DURATION * (1.0 - diff / DURATION_TOLERANCE_MS)
|
||||
@@ -157,6 +163,14 @@ def select_best(
|
||||
best_score = -1.0
|
||||
|
||||
for c in candidates:
|
||||
# Hard duration filter: both sides have duration but they don't match → skip.
|
||||
if (
|
||||
track_length_ms is not None
|
||||
and c.duration_ms is not None
|
||||
and abs(c.duration_ms - track_length_ms) > DURATION_TOLERANCE_MS
|
||||
):
|
||||
continue
|
||||
|
||||
s = _score_candidate(c, title, artist, album, track_length_ms)
|
||||
if s > best_score:
|
||||
best_score = s
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "lrx-cli"
|
||||
version = "0.3.2"
|
||||
version = "0.3.4"
|
||||
description = "Fetch line-synced lyrics for your music player."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
|
||||
@@ -101,6 +101,31 @@ def test_score_duration_linear_decay() -> None:
|
||||
assert score_edge == 0.0
|
||||
|
||||
|
||||
def test_duration_hard_filter_rejects_all_mismatched() -> None:
|
||||
"""All candidates outside duration tolerance are filtered before scoring."""
|
||||
candidates = [
|
||||
SearchCandidate(
|
||||
item="wrong", duration_ms=180000.0, title="My Love", artist="Westlife"
|
||||
),
|
||||
SearchCandidate(
|
||||
item="also-wrong", duration_ms=300000.0, title="My Love", artist="Westlife"
|
||||
),
|
||||
]
|
||||
best, _ = select_best(candidates, 232000, title="My Love", artist="Westlife")
|
||||
assert best is None
|
||||
|
||||
|
||||
def test_duration_neutral_when_ref_has_no_duration() -> None:
|
||||
"""Candidate duration does not penalise when the reference has no duration."""
|
||||
# Candidate A: title only (no duration)
|
||||
c_no_dur = SearchCandidate(item="no-dur", title="My Love")
|
||||
# Candidate B: same title + a duration (ref has none)
|
||||
c_with_dur = SearchCandidate(item="with-dur", title="My Love", duration_ms=232000.0)
|
||||
score_no_dur = _score_candidate(c_no_dur, "My Love", None, None, None)
|
||||
score_with_dur = _score_candidate(c_with_dur, "My Love", None, None, None)
|
||||
assert score_no_dur == score_with_dur
|
||||
|
||||
|
||||
def test_score_case_insensitive_title() -> None:
|
||||
c = SearchCandidate(item="x", title="my love")
|
||||
s1 = _score_candidate(c, "My Love", None, None, None)
|
||||
|
||||
Reference in New Issue
Block a user