refactor: better (really?🤨) lrc parsing and handling
This commit is contained in:
@@ -80,7 +80,7 @@ def test_cache_search_fetcher_with_fuzzy_metadata(
|
||||
|
||||
assert result is not None
|
||||
assert result.lyrics is not None
|
||||
assert result.lyrics.to_lrc() == expected_lrc
|
||||
assert result.lyrics.to_text() == expected_lrc
|
||||
|
||||
|
||||
def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
|
||||
@@ -97,7 +97,7 @@ def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
|
||||
|
||||
assert result is not None
|
||||
assert result.lyrics is not None
|
||||
assert result.lyrics.to_lrc() == "[00:00.01]artist modified"
|
||||
assert result.lyrics.to_text() == "[00:00.01]artist modified"
|
||||
|
||||
|
||||
@pytest.mark.network
|
||||
|
||||
+139
-32
@@ -1,6 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from lrx_cli.lrc import LRCData
|
||||
from lrx_cli.lrc import (
|
||||
LRCData,
|
||||
DocTagLine,
|
||||
LyricLine,
|
||||
WordSyncLyricLine,
|
||||
)
|
||||
from lrx_cli.models import CacheStatus
|
||||
|
||||
|
||||
@@ -8,7 +13,7 @@ def _normalize(text: str) -> str:
|
||||
return str(LRCData(text))
|
||||
|
||||
|
||||
def test_normalize_tags_supports_all_raw_time_formats() -> None:
|
||||
def test_time_tag_formats_are_normalized() -> None:
|
||||
raw = "\n".join(
|
||||
[
|
||||
"[00:01]a",
|
||||
@@ -32,37 +37,27 @@ def test_normalize_tags_supports_all_raw_time_formats() -> None:
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_tags_keeps_non_timed_lines_trimmed_and_unchanged() -> None:
|
||||
raw = " plain line \n\n [ar:Meta Header] "
|
||||
def test_non_timed_lines_are_kept_as_lyrics() -> None:
|
||||
raw = " plain line \n\n other line "
|
||||
|
||||
normalized = _normalize(raw)
|
||||
|
||||
assert normalized == "plain line\n\n[ar:Meta Header]"
|
||||
assert normalized == "plain line\n\nother line"
|
||||
|
||||
|
||||
def test_normalize_tags_removes_word_sync_patterns() -> None:
|
||||
raw = (
|
||||
"[00:01.00]<00:01>hello\n"
|
||||
"[00:02.00]<00:02.3>world\n"
|
||||
"[00:03.00]<00:03.45>foo\n"
|
||||
"[00:04.00]<00:04:678>bar\n"
|
||||
"[00:05.00]<1,2,3>baz"
|
||||
)
|
||||
def test_word_sync_tags_are_parsed_and_export_controlled() -> None:
|
||||
raw = "[00:01.00]<00:01>he <00:01.50>llo\n[00:02.00]plain"
|
||||
|
||||
normalized = _normalize(raw)
|
||||
data = LRCData(raw)
|
||||
|
||||
assert normalized == "\n".join(
|
||||
[
|
||||
"[00:01.00]hello",
|
||||
"[00:02.00]world",
|
||||
"[00:03.00]foo",
|
||||
"[00:04.00]bar",
|
||||
"[00:05.00]baz",
|
||||
]
|
||||
assert data.to_text(include_word_sync=False) == "[00:01.00]he llo\n[00:02.00]plain"
|
||||
assert (
|
||||
data.to_text(include_word_sync=True)
|
||||
== "[00:01.00]<00:01.00>he <00:01.50>llo\n[00:02.00]plain"
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_tags_keeps_midline_timestamps_as_is() -> None:
|
||||
def test_midline_line_tags_are_kept_as_plain_text() -> None:
|
||||
raw = "[00:01.00]Lyric [00:02.00]line"
|
||||
|
||||
normalized = _normalize(raw)
|
||||
@@ -74,11 +69,11 @@ def test_normalize_tags_applies_positive_and_negative_offset_per_spec() -> None:
|
||||
positive = _normalize("[offset:+1000]\n[00:10.00]line")
|
||||
negative = _normalize("[offset:-500]\n[00:10.00]line")
|
||||
|
||||
assert positive == "[00:09.00]line"
|
||||
assert negative == "[00:10.50]line"
|
||||
assert positive == "[offset:+1000]\n[00:10.00]line"
|
||||
assert negative == "[offset:-500]\n[00:10.00]line"
|
||||
|
||||
|
||||
def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None:
|
||||
def test_leading_spaces_before_first_time_tag_are_trimmed() -> None:
|
||||
raw = "\t [00:01.2] hello"
|
||||
|
||||
normalized = _normalize(raw)
|
||||
@@ -89,12 +84,14 @@ def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None:
|
||||
def test_normalize_tags_handles_consecutive_start_tags_with_spaces_between() -> None:
|
||||
raw = "[00:01] [00:02.3] chorus"
|
||||
|
||||
normalized = _normalize(raw)
|
||||
|
||||
assert normalized == "[00:01.00][00:02.30]chorus"
|
||||
data = LRCData(raw)
|
||||
assert len(data.lines) == 1
|
||||
assert isinstance(data.lines[0], LyricLine)
|
||||
assert data.lines[0].line_times_ms == [1000, 2300]
|
||||
assert data.lines[0].text == "chorus"
|
||||
|
||||
|
||||
def test_normalize_tags_preserves_non_leading_raw_like_tags() -> None:
|
||||
def test_non_leading_time_like_text_is_plain_lyric() -> None:
|
||||
raw = "intro [00:01]line"
|
||||
|
||||
normalized = _normalize(raw)
|
||||
@@ -107,7 +104,7 @@ def test_normalize_tags_removes_offset_tag_line_even_without_lyrics() -> None:
|
||||
|
||||
normalized = _normalize(raw)
|
||||
|
||||
assert normalized == ""
|
||||
assert normalized == "[offset:+500]"
|
||||
|
||||
|
||||
def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None:
|
||||
@@ -140,7 +137,7 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
|
||||
)
|
||||
|
||||
|
||||
def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
|
||||
def test_to_plain_duplicates_lines_for_multi_line_times() -> None:
|
||||
text = "\n".join(
|
||||
[
|
||||
"[00:02.00][00:01.00]hello",
|
||||
@@ -210,3 +207,113 @@ def test_reformat_pipeline_trims_outer_blanks_and_preserves_inner_blanks() -> No
|
||||
normalized = str(LRCData(text))
|
||||
|
||||
assert normalized == "[00:01.00]a\n\n[00:02.00]b"
|
||||
|
||||
|
||||
def test_single_doc_tag_line_is_not_added_to_lines() -> None:
|
||||
data = LRCData("[ar:Artist]\n[00:01.00]line")
|
||||
|
||||
assert data.tags == {"ar": "Artist"}
|
||||
assert len(data.lines) == 2
|
||||
assert isinstance(data.lines[0], DocTagLine)
|
||||
assert isinstance(data.lines[1], LyricLine)
|
||||
assert data.lines[1].text == "line"
|
||||
|
||||
|
||||
def test_multiple_doc_tags_on_one_line_are_plain_lyrics() -> None:
|
||||
data = LRCData("[ar:Artist][ti:Song]")
|
||||
|
||||
assert data.tags == {}
|
||||
assert len(data.lines) == 1
|
||||
assert data.lines[0].text == "[ar:Artist][ti:Song]"
|
||||
|
||||
|
||||
def test_doc_tag_after_lyrics_is_treated_as_lyrics() -> None:
|
||||
data = LRCData("[00:01.00]line\n[ar:Artist]")
|
||||
|
||||
assert data.tags == {"ar": "Artist"}
|
||||
assert len(data.lines) == 2
|
||||
assert isinstance(data.lines[1], DocTagLine)
|
||||
assert data.lines[1].text == "[ar:Artist]"
|
||||
|
||||
|
||||
def test_unknown_lines_before_lyrics_are_preserved_and_do_not_start_lyrics() -> None:
|
||||
data = LRCData("comment line\n[ar:Artist]\n[00:01.00]line")
|
||||
|
||||
assert data.tags == {"ar": "Artist"}
|
||||
assert len(data.lines) == 3
|
||||
assert isinstance(data.lines[0], LyricLine)
|
||||
assert isinstance(data.lines[1], DocTagLine)
|
||||
assert data.lines[2].text == "line"
|
||||
assert str(data).startswith("comment line\n[ar:Artist]\n")
|
||||
|
||||
|
||||
def test_to_plain_excludes_doc_tags_but_keeps_lyrics() -> None:
|
||||
data = LRCData("[ar:Artist]\n[00:01.00]line\n[ti:Song]\nplain")
|
||||
|
||||
assert data.to_plain() == "line"
|
||||
|
||||
|
||||
def test_non_space_between_line_tags_stops_tag_parsing() -> None:
|
||||
data = LRCData("[00:01.00]x[00:02.00]tail")
|
||||
|
||||
assert len(data.lines) == 1
|
||||
assert isinstance(data.lines[0], LyricLine)
|
||||
assert data.lines[0].line_times_ms == [1000]
|
||||
assert data.lines[0].text == "x[00:02.00]tail"
|
||||
|
||||
|
||||
def test_line_only_time_tag_is_valid_empty_lyric() -> None:
|
||||
data = LRCData("[00:01.00]")
|
||||
|
||||
assert len(data.lines) == 1
|
||||
assert isinstance(data.lines[0], LyricLine)
|
||||
assert data.lines[0].line_times_ms == [1000]
|
||||
assert data.lines[0].text == ""
|
||||
|
||||
|
||||
def test_model_uses_subclass_for_word_sync_lines() -> None:
|
||||
a = LRCData("[00:01.00]<00:00.50>lyric")
|
||||
b = LRCData("[00:01.00]lyric")
|
||||
|
||||
assert isinstance(a.lines[0], WordSyncLyricLine)
|
||||
assert isinstance(b.lines[0], LyricLine)
|
||||
assert not isinstance(b.lines[0], WordSyncLyricLine)
|
||||
|
||||
|
||||
def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None:
|
||||
data = LRCData("[00:01.00]<00:02.00>")
|
||||
|
||||
assert isinstance(data.lines[0], WordSyncLyricLine)
|
||||
assert data.to_text(include_word_sync=False) == "[00:01.00]"
|
||||
assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>"
|
||||
|
||||
|
||||
def test_to_text_plain_true_matches_to_plain_output() -> None:
|
||||
data = LRCData("[00:02.00]b\n[00:01.00]a")
|
||||
|
||||
assert data.to_text(plain=True) == data.to_plain()
|
||||
|
||||
|
||||
def test_to_unsynced_converts_to_plain_based_unsynced_data() -> None:
|
||||
data = LRCData("[ar:Artist]\n[00:02.00]b\n[00:01.00]a")
|
||||
|
||||
unsynced = data.to_unsynced()
|
||||
|
||||
assert isinstance(unsynced, LRCData)
|
||||
assert str(unsynced) == "a\nb"
|
||||
|
||||
|
||||
def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None:
|
||||
data = LRCData("[ar:First]\n[ar:Second]\n[00:01.00]line")
|
||||
|
||||
assert data.tags == {"ar": "Second"}
|
||||
assert len(data.lines) == 3
|
||||
assert isinstance(data.lines[0], DocTagLine)
|
||||
assert isinstance(data.lines[1], DocTagLine)
|
||||
assert str(data).startswith("[ar:First]\n[ar:Second]\n")
|
||||
|
||||
|
||||
def test_to_plain_for_doc_only_text_is_empty() -> None:
|
||||
data = LRCData("[ar:Artist]\n[ti:Song]")
|
||||
|
||||
assert data.to_plain() == ""
|
||||
|
||||
Reference in New Issue
Block a user