feat: add hard duration filter to avoid fake positive matches
This commit is contained in:
@@ -80,7 +80,8 @@ def _score_candidate(
|
|||||||
- Title: 40
|
- Title: 40
|
||||||
- Artist: 30
|
- Artist: 30
|
||||||
- Album: 10
|
- Album: 10
|
||||||
- Duration: 10
|
- Duration: 10 (only when reference track has duration; hard mismatch is
|
||||||
|
pre-filtered before scoring)
|
||||||
"""
|
"""
|
||||||
raw = 0.0
|
raw = 0.0
|
||||||
available_weight = 0.0
|
available_weight = 0.0
|
||||||
@@ -115,10 +116,15 @@ def _score_candidate(
|
|||||||
normalize_for_match(ref_album), normalize_for_match(c.album)
|
normalize_for_match(ref_album), normalize_for_match(c.album)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Duration
|
# Duration — only counted when the reference track has duration.
|
||||||
if ref_length_ms is not None or c.duration_ms is not None:
|
# If the candidate also has duration, it contributes positively when matching
|
||||||
|
# (hard mismatch is already filtered upstream in select_best).
|
||||||
|
# If the candidate lacks duration, it contributes 0 to raw but still counts
|
||||||
|
# in available_weight (penalty for missing verifiable info).
|
||||||
|
# If the reference has no duration, duration is excluded entirely (neutral).
|
||||||
|
if ref_length_ms is not None:
|
||||||
available_weight += _W_DURATION
|
available_weight += _W_DURATION
|
||||||
if ref_length_ms is not None and c.duration_ms is not None:
|
if c.duration_ms is not None:
|
||||||
diff = abs(c.duration_ms - ref_length_ms)
|
diff = abs(c.duration_ms - ref_length_ms)
|
||||||
if diff <= DURATION_TOLERANCE_MS:
|
if diff <= DURATION_TOLERANCE_MS:
|
||||||
raw += _W_DURATION * (1.0 - diff / DURATION_TOLERANCE_MS)
|
raw += _W_DURATION * (1.0 - diff / DURATION_TOLERANCE_MS)
|
||||||
@@ -157,6 +163,14 @@ def select_best(
|
|||||||
best_score = -1.0
|
best_score = -1.0
|
||||||
|
|
||||||
for c in candidates:
|
for c in candidates:
|
||||||
|
# Hard duration filter: both sides have duration but they don't match → skip.
|
||||||
|
if (
|
||||||
|
track_length_ms is not None
|
||||||
|
and c.duration_ms is not None
|
||||||
|
and abs(c.duration_ms - track_length_ms) > DURATION_TOLERANCE_MS
|
||||||
|
):
|
||||||
|
continue
|
||||||
|
|
||||||
s = _score_candidate(c, title, artist, album, track_length_ms)
|
s = _score_candidate(c, title, artist, album, track_length_ms)
|
||||||
if s > best_score:
|
if s > best_score:
|
||||||
best_score = s
|
best_score = s
|
||||||
|
|||||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "lrx-cli"
|
name = "lrx-cli"
|
||||||
version = "0.3.2"
|
version = "0.3.4"
|
||||||
description = "Fetch line-synced lyrics for your music player."
|
description = "Fetch line-synced lyrics for your music player."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
|
|||||||
@@ -101,6 +101,31 @@ def test_score_duration_linear_decay() -> None:
|
|||||||
assert score_edge == 0.0
|
assert score_edge == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_duration_hard_filter_rejects_all_mismatched() -> None:
|
||||||
|
"""All candidates outside duration tolerance are filtered before scoring."""
|
||||||
|
candidates = [
|
||||||
|
SearchCandidate(
|
||||||
|
item="wrong", duration_ms=180000.0, title="My Love", artist="Westlife"
|
||||||
|
),
|
||||||
|
SearchCandidate(
|
||||||
|
item="also-wrong", duration_ms=300000.0, title="My Love", artist="Westlife"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
best, _ = select_best(candidates, 232000, title="My Love", artist="Westlife")
|
||||||
|
assert best is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_duration_neutral_when_ref_has_no_duration() -> None:
|
||||||
|
"""Candidate duration does not penalise when the reference has no duration."""
|
||||||
|
# Candidate A: title only (no duration)
|
||||||
|
c_no_dur = SearchCandidate(item="no-dur", title="My Love")
|
||||||
|
# Candidate B: same title + a duration (ref has none)
|
||||||
|
c_with_dur = SearchCandidate(item="with-dur", title="My Love", duration_ms=232000.0)
|
||||||
|
score_no_dur = _score_candidate(c_no_dur, "My Love", None, None, None)
|
||||||
|
score_with_dur = _score_candidate(c_with_dur, "My Love", None, None, None)
|
||||||
|
assert score_no_dur == score_with_dur
|
||||||
|
|
||||||
|
|
||||||
def test_score_case_insensitive_title() -> None:
|
def test_score_case_insensitive_title() -> None:
|
||||||
c = SearchCandidate(item="x", title="my love")
|
c = SearchCandidate(item="x", title="my love")
|
||||||
s1 = _score_candidate(c, "My Love", None, None, None)
|
s1 = _score_candidate(c, "My Love", None, None, None)
|
||||||
|
|||||||
Reference in New Issue
Block a user