From 9a200aa58d14986b2cc7d11ae4be7dc4f397d543 Mon Sep 17 00:00:00 2001 From: Uyanide Date: Fri, 3 Apr 2026 16:12:02 +0200 Subject: [PATCH] feat: add hard duration filter to avoid fake positive matches --- lrx_cli/fetchers/selection.py | 22 ++++++++++++++++++---- pyproject.toml | 2 +- tests/test_selection.py | 25 +++++++++++++++++++++++++ uv.lock | 2 +- 4 files changed, 45 insertions(+), 6 deletions(-) diff --git a/lrx_cli/fetchers/selection.py b/lrx_cli/fetchers/selection.py index cb2f76d..7ed99a5 100644 --- a/lrx_cli/fetchers/selection.py +++ b/lrx_cli/fetchers/selection.py @@ -80,7 +80,8 @@ def _score_candidate( - Title: 40 - Artist: 30 - Album: 10 - - Duration: 10 + - Duration: 10 (only when reference track has duration; hard mismatch is + pre-filtered before scoring) """ raw = 0.0 available_weight = 0.0 @@ -115,10 +116,15 @@ def _score_candidate( normalize_for_match(ref_album), normalize_for_match(c.album) ) - # Duration - if ref_length_ms is not None or c.duration_ms is not None: + # Duration — only counted when the reference track has duration. + # If the candidate also has duration, it contributes positively when matching + # (hard mismatch is already filtered upstream in select_best). + # If the candidate lacks duration, it contributes 0 to raw but still counts + # in available_weight (penalty for missing verifiable info). + # If the reference has no duration, duration is excluded entirely (neutral). + if ref_length_ms is not None: available_weight += _W_DURATION - if ref_length_ms is not None and c.duration_ms is not None: + if c.duration_ms is not None: diff = abs(c.duration_ms - ref_length_ms) if diff <= DURATION_TOLERANCE_MS: raw += _W_DURATION * (1.0 - diff / DURATION_TOLERANCE_MS) @@ -157,6 +163,14 @@ def select_best( best_score = -1.0 for c in candidates: + # Hard duration filter: both sides have duration but they don't match → skip. + if ( + track_length_ms is not None + and c.duration_ms is not None + and abs(c.duration_ms - track_length_ms) > DURATION_TOLERANCE_MS + ): + continue + s = _score_candidate(c, title, artist, album, track_length_ms) if s > best_score: best_score = s diff --git a/pyproject.toml b/pyproject.toml index 5a8b665..8e1de1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "lrx-cli" -version = "0.3.2" +version = "0.3.4" description = "Fetch line-synced lyrics for your music player." readme = "README.md" requires-python = ">=3.13" diff --git a/tests/test_selection.py b/tests/test_selection.py index 632a14b..90511b8 100644 --- a/tests/test_selection.py +++ b/tests/test_selection.py @@ -101,6 +101,31 @@ def test_score_duration_linear_decay() -> None: assert score_edge == 0.0 +def test_duration_hard_filter_rejects_all_mismatched() -> None: + """All candidates outside duration tolerance are filtered before scoring.""" + candidates = [ + SearchCandidate( + item="wrong", duration_ms=180000.0, title="My Love", artist="Westlife" + ), + SearchCandidate( + item="also-wrong", duration_ms=300000.0, title="My Love", artist="Westlife" + ), + ] + best, _ = select_best(candidates, 232000, title="My Love", artist="Westlife") + assert best is None + + +def test_duration_neutral_when_ref_has_no_duration() -> None: + """Candidate duration does not penalise when the reference has no duration.""" + # Candidate A: title only (no duration) + c_no_dur = SearchCandidate(item="no-dur", title="My Love") + # Candidate B: same title + a duration (ref has none) + c_with_dur = SearchCandidate(item="with-dur", title="My Love", duration_ms=232000.0) + score_no_dur = _score_candidate(c_no_dur, "My Love", None, None, None) + score_with_dur = _score_candidate(c_with_dur, "My Love", None, None, None) + assert score_no_dur == score_with_dur + + def test_score_case_insensitive_title() -> None: c = SearchCandidate(item="x", title="my love") s1 = _score_candidate(c, "My Love", None, None, None) diff --git a/uv.lock b/uv.lock index a93895e..9fc4d6d 100644 --- a/uv.lock +++ b/uv.lock @@ -153,7 +153,7 @@ wheels = [ [[package]] name = "lrx-cli" -version = "0.3.2" +version = "0.3.3" source = { editable = "." } dependencies = [ { name = "cyclopts" },