feat: add hard duration filter to avoid fake positive matches

This commit is contained in:
2026-04-03 16:12:02 +02:00
parent c4a2944cec
commit 553e732152
4 changed files with 45 additions and 6 deletions
+18 -4
View File
@@ -80,7 +80,8 @@ def _score_candidate(
- Title: 40 - Title: 40
- Artist: 30 - Artist: 30
- Album: 10 - Album: 10
- Duration: 10 - Duration: 10 (only when reference track has duration; hard mismatch is
pre-filtered before scoring)
""" """
raw = 0.0 raw = 0.0
available_weight = 0.0 available_weight = 0.0
@@ -115,10 +116,15 @@ def _score_candidate(
normalize_for_match(ref_album), normalize_for_match(c.album) normalize_for_match(ref_album), normalize_for_match(c.album)
) )
# Duration # Duration — only counted when the reference track has duration.
if ref_length_ms is not None or c.duration_ms is not None: # If the candidate also has duration, it contributes positively when matching
# (hard mismatch is already filtered upstream in select_best).
# If the candidate lacks duration, it contributes 0 to raw but still counts
# in available_weight (penalty for missing verifiable info).
# If the reference has no duration, duration is excluded entirely (neutral).
if ref_length_ms is not None:
available_weight += _W_DURATION available_weight += _W_DURATION
if ref_length_ms is not None and c.duration_ms is not None: if c.duration_ms is not None:
diff = abs(c.duration_ms - ref_length_ms) diff = abs(c.duration_ms - ref_length_ms)
if diff <= DURATION_TOLERANCE_MS: if diff <= DURATION_TOLERANCE_MS:
raw += _W_DURATION * (1.0 - diff / DURATION_TOLERANCE_MS) raw += _W_DURATION * (1.0 - diff / DURATION_TOLERANCE_MS)
@@ -157,6 +163,14 @@ def select_best(
best_score = -1.0 best_score = -1.0
for c in candidates: for c in candidates:
# Hard duration filter: both sides have duration but they don't match → skip.
if (
track_length_ms is not None
and c.duration_ms is not None
and abs(c.duration_ms - track_length_ms) > DURATION_TOLERANCE_MS
):
continue
s = _score_candidate(c, title, artist, album, track_length_ms) s = _score_candidate(c, title, artist, album, track_length_ms)
if s > best_score: if s > best_score:
best_score = s best_score = s
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "lrx-cli" name = "lrx-cli"
version = "0.3.2" version = "0.3.4"
description = "Fetch line-synced lyrics for your music player." description = "Fetch line-synced lyrics for your music player."
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
+25
View File
@@ -101,6 +101,31 @@ def test_score_duration_linear_decay() -> None:
assert score_edge == 0.0 assert score_edge == 0.0
def test_duration_hard_filter_rejects_all_mismatched() -> None:
"""All candidates outside duration tolerance are filtered before scoring."""
candidates = [
SearchCandidate(
item="wrong", duration_ms=180000.0, title="My Love", artist="Westlife"
),
SearchCandidate(
item="also-wrong", duration_ms=300000.0, title="My Love", artist="Westlife"
),
]
best, _ = select_best(candidates, 232000, title="My Love", artist="Westlife")
assert best is None
def test_duration_neutral_when_ref_has_no_duration() -> None:
"""Candidate duration does not penalise when the reference has no duration."""
# Candidate A: title only (no duration)
c_no_dur = SearchCandidate(item="no-dur", title="My Love")
# Candidate B: same title + a duration (ref has none)
c_with_dur = SearchCandidate(item="with-dur", title="My Love", duration_ms=232000.0)
score_no_dur = _score_candidate(c_no_dur, "My Love", None, None, None)
score_with_dur = _score_candidate(c_with_dur, "My Love", None, None, None)
assert score_no_dur == score_with_dur
def test_score_case_insensitive_title() -> None: def test_score_case_insensitive_title() -> None:
c = SearchCandidate(item="x", title="my love") c = SearchCandidate(item="x", title="my love")
s1 = _score_candidate(c, "My Love", None, None, None) s1 = _score_candidate(c, "My Love", None, None, None)
Generated
+1 -1
View File
@@ -153,7 +153,7 @@ wheels = [
[[package]] [[package]]
name = "lrx-cli" name = "lrx-cli"
version = "0.3.2" version = "0.3.3"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "cyclopts" }, { name = "cyclopts" },