from __future__ import annotations from lrx_cli.fetchers.selection import ( SearchCandidate, select_best, _score_candidate, _text_similarity, MIN_CONFIDENCE, ) def test_text_similarity_exact() -> None: assert _text_similarity("my love", "my love") == 1.0 def test_text_similarity_empty() -> None: assert _text_similarity("", "anything") == 0.0 assert _text_similarity("anything", "") == 0.0 def test_text_similarity_no_overlap() -> None: assert _text_similarity("hello", "world") == 0.0 def test_text_similarity_containment() -> None: # "my love" is contained in "my love album version" score = _text_similarity("my love", "my love album version") assert 0.0 < score < 1.0 assert score == len("my love") / len("my love album version") def test_score_perfect_match() -> None: """Exact metadata + close duration + synced = 100.""" c = SearchCandidate( item="x", duration_ms=232000.0, is_synced=True, title="My Love", artist="Westlife", album="Coast To Coast", ) score = _score_candidate(c, "My Love", "Westlife", "Coast To Coast", 232000) assert score == 100.0 def test_score_no_metadata_match() -> None: """Completely wrong metadata should score very low.""" c = SearchCandidate( item="x", duration_ms=192000.0, is_synced=True, title="Let My Love Be Your Pillow (Live)", artist="Ronnie Milsap", album="The Essential Ronnie Milsap", ) score = _score_candidate(c, "My Love", "Westlife", "Coast To Coast", 232000) assert score < MIN_CONFIDENCE def test_score_missing_both_sides_neutral() -> None: """If neither ref nor candidate has any field, only synced bonus applies.""" c = SearchCandidate(item="x", is_synced=True) score = _score_candidate(c, None, None, None, None) # No comparable fields → metadata = 0, synced = 10 assert score == 10.0 def test_score_missing_one_side_gives_zero_for_field() -> None: """If ref has title but candidate doesn't, title gets 0 and weight still counts.""" c = SearchCandidate(item="x", title=None, is_synced=True) # Only title is in play (weight=40), candidate missing → raw=0, rescaled=0, + synced=10 score = _score_candidate(c, "My Love", None, None, None) assert score == 10.0 def test_score_synced_bonus() -> None: """Synced adds 10 points.""" base = SearchCandidate(item="x", title="My Love", is_synced=False) synced = SearchCandidate(item="x", title="My Love", is_synced=True) diff = _score_candidate(synced, "My Love", None, None, None) - _score_candidate( base, "My Love", None, None, None ) assert diff == 10.0 def test_score_duration_linear_decay() -> None: """Duration score decays linearly; ratios between exact/half/edge are preserved.""" exact = SearchCandidate(item="x", duration_ms=232000.0) score_exact = _score_candidate(exact, None, None, None, 232000) half_tol = SearchCandidate(item="x", duration_ms=232000.0 + 1500.0) score_half = _score_candidate(half_tol, None, None, None, 232000) at_tol = SearchCandidate(item="x", duration_ms=232000.0 + 3000.0) score_edge = _score_candidate(at_tol, None, None, None, 232000) # Only duration is comparable → rescaled to fill 0-90 # exact=90, half=45, edge=0 assert score_exact == 90.0 assert score_half == 45.0 assert score_edge == 0.0 def test_duration_hard_filter_rejects_all_mismatched() -> None: """All candidates outside duration tolerance are filtered before scoring.""" candidates = [ SearchCandidate( item="wrong", duration_ms=180000.0, title="My Love", artist="Westlife" ), SearchCandidate( item="also-wrong", duration_ms=300000.0, title="My Love", artist="Westlife" ), ] best, _ = select_best(candidates, 232000, title="My Love", artist="Westlife") assert best is None def test_duration_neutral_when_ref_has_no_duration() -> None: """Candidate duration does not penalise when the reference has no duration.""" # Candidate A: title only (no duration) c_no_dur = SearchCandidate(item="no-dur", title="My Love") # Candidate B: same title + a duration (ref has none) c_with_dur = SearchCandidate(item="with-dur", title="My Love", duration_ms=232000.0) score_no_dur = _score_candidate(c_no_dur, "My Love", None, None, None) score_with_dur = _score_candidate(c_with_dur, "My Love", None, None, None) assert score_no_dur == score_with_dur def test_score_case_insensitive_title() -> None: c = SearchCandidate(item="x", title="my love") s1 = _score_candidate(c, "My Love", None, None, None) s2 = _score_candidate(c, "my love", None, None, None) assert s1 == s2 def test_score_artist_normalization() -> None: """'Westlife feat. Someone' should still match 'Westlife'.""" c = SearchCandidate(item="x", artist="Westlife feat. Someone") # normalize_artist strips feat. → both become "westlife" score = _score_candidate(c, None, "Westlife", None, None) assert score >= 30.0 # full artist weight (30) when both None on other fields # Reference track: Westlife - My Love, album Coast To Coast, ~232s _REF_TITLE = "My Love" _REF_ARTIST = "Westlife" _REF_ALBUM = "Coast To Coast" _REF_LENGTH = 232000 # ms def _lrclib_candidates() -> list[SearchCandidate[dict]]: """Fixtures from real LRCLIB search results.""" raw = [ { "trackName": "My Love", "artistName": "Westlife", "albumName": "null", "duration": 232.0, "synced": True, }, { "trackName": "My Love", "artistName": "Westlife", "albumName": "null", "duration": 180.0, "synced": True, }, { "trackName": "My love", "artistName": "Westlife", "albumName": "moments", "duration": 235.327, "synced": True, }, { "trackName": "My Love", "artistName": "Westlife", "albumName": "Unbreakable", "duration": 233.026, "synced": True, }, { "trackName": "My Love", "artistName": "Westlife", "albumName": "Coast To Coast", "duration": 231.847, "synced": True, }, { "trackName": "Hello My Love", "artistName": "Westlife", "albumName": "Spectrum", "duration": 216.0, "synced": True, }, { "trackName": "My Love", "artistName": "Westlife", "albumName": "Hitzone 13", "duration": 231.0, "synced": True, }, ] return [ SearchCandidate( item=r, duration_ms=r["duration"] * 1000, is_synced=r["synced"], title=r["trackName"], artist=r["artistName"], album=r["albumName"], ) for r in raw ] def _lrclib_noisy_candidates() -> list[SearchCandidate[dict]]: """Fixtures from LRCLIB title-only search — lots of wrong artists.""" raw = [ { "trackName": "Let My Love Be Your Pillow (Live)", "artistName": "Ronnie Milsap", "albumName": "The Essential Ronnie Milsap", "duration": 192.0, "synced": True, }, { "trackName": "My Love", "artistName": "Little Texas", "albumName": "Big Time", "duration": 248.0, "synced": True, }, { "trackName": "My Love (Album Version)", "artistName": "Little Texas", "albumName": "Greatest Hits", "duration": 248.0, "synced": True, }, { "trackName": "My Love - Digitally Remastered '89", "artistName": "Sonny James", "albumName": "Capitol Collectors Series", "duration": 169.0, "synced": False, }, { "trackName": "My Love", "artistName": "Westlife", "albumName": "Coast To Coast", "duration": 231.847, "synced": True, }, ] return [ SearchCandidate( item=r, duration_ms=r["duration"] * 1000, is_synced=r["synced"], title=r["trackName"], artist=r["artistName"], album=r["albumName"], ) for r in raw ] def _netease_candidates() -> list[SearchCandidate[int]]: """Fixtures from real Netease search results.""" raw = [ { "id": 2080607, "name": "My Love", "artist": "Westlife", "album": "Unbreakable, Vol. 1 - The Greatest Hits", "dt": 231941, }, { "id": 2080749, "name": "My Love (Radio Edit)", "artist": "Westlife", "album": "World Of Our Own - No. 1 Hits Plus (EP)", "dt": 232920, }, { "id": 29809886, "name": "My Love (Live)", "artist": "Westlife", "album": "The Farewell Tour: Live at Croke Park", "dt": 262000, }, { "id": 572412968, "name": "My Love", "artist": "Westlife", "album": "Pure... Love", "dt": 231000, }, { "id": 20707713, "name": "You Raise Me Up", "artist": "Westlife", "album": "You Raise Me Up", "dt": 241116, }, ] return [ SearchCandidate( item=r["id"], duration_ms=float(r["dt"]), title=r["name"], artist=r["artist"], album=r["album"], ) for r in raw ] def test_lrclib_picks_exact_album_match() -> None: """With full metadata, should pick the Coast To Coast entry.""" candidates = _lrclib_candidates() best, score = select_best( candidates, _REF_LENGTH, title=_REF_TITLE, artist=_REF_ARTIST, album=_REF_ALBUM, ) assert best is not None assert best["albumName"] == "Coast To Coast" assert score >= MIN_CONFIDENCE def test_lrclib_rejects_wrong_title() -> None: """'Hello My Love' should not beat 'My Love' entries.""" candidates = _lrclib_candidates() best, _ = select_best( candidates, _REF_LENGTH, title=_REF_TITLE, artist=_REF_ARTIST, album=_REF_ALBUM, ) assert best is not None assert best["trackName"] != "Hello My Love" def test_lrclib_noisy_picks_westlife() -> None: """In noisy title-only results, artist matching should filter to Westlife.""" candidates = _lrclib_noisy_candidates() best, _ = select_best( candidates, _REF_LENGTH, title=_REF_TITLE, artist=_REF_ARTIST, album=_REF_ALBUM, ) assert best is not None assert best["artistName"] == "Westlife" def test_lrclib_noisy_rejects_all_without_ref_artist() -> None: """Without ref artist, wrong-artist candidates may still win, but right title should rank higher.""" candidates = _lrclib_noisy_candidates() best, _ = select_best( candidates, _REF_LENGTH, title=_REF_TITLE, ) # Should pick a "My Love" over "Let My Love Be Your Pillow" assert best is not None assert "My Love" == best["trackName"] or best["trackName"].startswith("My Love") def test_netease_picks_closest_duration() -> None: candidates = _netease_candidates() best, _ = select_best( candidates, _REF_LENGTH, title=_REF_TITLE, artist=_REF_ARTIST, album=_REF_ALBUM, ) # 2080607 has dt=231941 (diff=59ms), closest to 232000 assert best == 2080607 def test_netease_rejects_wrong_title() -> None: """'You Raise Me Up' should not be selected.""" candidates = _netease_candidates() best, _ = select_best( candidates, _REF_LENGTH, title=_REF_TITLE, artist=_REF_ARTIST, ) assert best != 20707713 def test_netease_without_ref_metadata_rejects_below_confidence() -> None: """Without any ref metadata, candidates with one-sided fields score low and get rejected.""" candidates = _netease_candidates() best, _ = select_best(candidates, _REF_LENGTH) # Candidates have title/artist/album but ref has None for all → 0 for text fields # Only duration (max 10) contributes → below MIN_CONFIDENCE (25) assert best is None # --- Edge cases --- def test_empty_candidates_returns_none() -> None: assert select_best([], track_length_ms=5000) == (None, 0.0) assert select_best([], track_length_ms=None) == (None, 0.0) def test_all_below_min_confidence_returns_none() -> None: """If all candidates score below threshold, return None.""" candidates = [ SearchCandidate( item="x", title="Completely Different Song", artist="Unknown Artist", album="Unknown Album", duration_ms=999999.0, ), ] result, _ = select_best( candidates, 232000, title="My Love", artist="Westlife", album="Coast To Coast", min_confidence=90.0, ) assert result is None def test_generic_type_preserved() -> None: int_candidates = [SearchCandidate(item=42, duration_ms=5000.0, title="x")] best, _ = select_best(int_candidates, 5000, title="x") assert best == 42 dict_candidates = [SearchCandidate(item={"id": 1}, title="x")] best, _ = select_best(dict_candidates, title="x") assert best == {"id": 1}