Compare commits

...

3 Commits

Author SHA1 Message Date
Uyanide 587d5dbe46 feat: normalize option 2026-04-08 08:01:41 +02:00
Uyanide 1e0f8e2868 fix: remove unused to_unsynced 2026-04-08 08:01:41 +02:00
Uyanide 573f8b5b8b test: update tests for lrc 2026-04-08 08:01:40 +02:00
5 changed files with 296 additions and 81 deletions
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "lrx-cli" name = "lrx-cli"
version = "0.6.2" version = "0.6.3"
description = "Fetch line-synced lyrics for your music player." description = "Fetch line-synced lyrics for your music player."
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
+48 -6
View File
@@ -99,7 +99,17 @@ def fetch(
plain: Annotated[ plain: Annotated[
bool, bool,
cyclopts.Parameter( cyclopts.Parameter(
name="--plain", negative="", help="Output only the raw lyrics without tags." name="--plain",
negative="",
help="Output only plain lyrics without tags (highest priority over --normalize).",
),
] = False,
normalize: Annotated[
bool,
cyclopts.Parameter(
name="--normalize",
negative="",
help="Output normalized LRC (ignored when --plain is also set).",
), ),
] = False, ] = False,
): ):
@@ -123,7 +133,12 @@ def fetch(
logger.error("No lyrics found.") logger.error("No lyrics found.")
sys.exit(1) sys.exit(1)
print(result.lyrics.to_text(plain=plain)) if plain:
print(result.lyrics.to_plain())
elif normalize:
print(result.lyrics.to_normalized_text())
else:
print(result.lyrics.to_text())
# search # search
@@ -179,7 +194,17 @@ def search(
plain: Annotated[ plain: Annotated[
bool, bool,
cyclopts.Parameter( cyclopts.Parameter(
name="--plain", negative="", help="Output only the raw lyrics without tags." name="--plain",
negative="",
help="Output only plain lyrics without tags (highest priority over --normalize).",
),
] = False,
normalize: Annotated[
bool,
cyclopts.Parameter(
name="--normalize",
negative="",
help="Output normalized LRC (ignored when --plain is also set).",
), ),
] = False, ] = False,
): ):
@@ -214,7 +239,12 @@ def search(
logger.error("No lyrics found.") logger.error("No lyrics found.")
sys.exit(1) sys.exit(1)
print(result.lyrics.to_text(plain=plain)) if plain:
print(result.lyrics.to_plain())
elif normalize:
print(result.lyrics.to_normalized_text())
else:
print(result.lyrics.to_text())
# export # export
@@ -253,7 +283,17 @@ def export(
plain: Annotated[ plain: Annotated[
bool, bool,
cyclopts.Parameter( cyclopts.Parameter(
name="--plain", negative="", help="Export only the raw lyrics without tags." name="--plain",
negative="",
help="Export only plain lyrics (.txt, highest priority over --normalize).",
),
] = False,
normalize: Annotated[
bool,
cyclopts.Parameter(
name="--normalize",
negative="",
help="Export normalized LRC output (ignored when --plain is also set).",
), ),
] = False, ] = False,
): ):
@@ -307,8 +347,10 @@ def export(
with open(output, "w", encoding="utf-8") as f: with open(output, "w", encoding="utf-8") as f:
if plain: if plain:
f.write(result.lyrics.to_plain()) f.write(result.lyrics.to_plain())
elif normalize:
f.write(result.lyrics.to_normalized_text())
else: else:
f.write(str(result.lyrics)) f.write(result.lyrics.to_text())
logger.info(f"Exported lyrics to {output}") logger.info(f"Exported lyrics to {output}")
except Exception as e: except Exception as e:
logger.error(f"Failed to write file: {e}") logger.error(f"Failed to write file: {e}")
+80 -14
View File
@@ -233,6 +233,14 @@ def _is_single_doc_tag_line(line: str) -> Optional[tuple[str, str]]:
return key, value return key, value
def _parse_offset_value(value: str) -> Optional[int]:
"""Parse doc offset value in milliseconds, returning None for invalid values."""
try:
return int(value.strip())
except ValueError:
return None
class LRCData: class LRCData:
_lines: list[BaseLine] _lines: list[BaseLine]
_doc_tags: dict[str, str] _doc_tags: dict[str, str]
@@ -265,7 +273,7 @@ class LRCData:
self._lines = parsed self._lines = parsed
def __str__(self) -> str: def __str__(self) -> str:
return self.to_text(plain=False, include_word_sync=False) return self._serialize_lines(self._lines, include_word_sync=True)
def __repr__(self) -> str: def __repr__(self) -> str:
return f"LRCData(doc_tags={self._doc_tags!r}, lines={self._lines!r})" return f"LRCData(doc_tags={self._doc_tags!r}, lines={self._lines!r})"
@@ -329,6 +337,59 @@ class LRCData:
ret._doc_tags = dict(self._doc_tags) ret._doc_tags = dict(self._doc_tags)
return ret return ret
def normalize(self) -> "LRCData":
"""Normalize LRC for decode/export oriented output.
Rules:
- Move all doc tags to the beginning, preserving line order and duplicates.
- Keep doc tags unchanged except removing all offset tags.
- Remove word-sync tags.
- Convert untagged non-empty lyric lines to [00:00.00] lyrics.
- Drop empty lyric lines.
- Expand lyric lines with multiple time tags into one line per tag.
- Apply offset (ms) to lyric timestamps and sort by timestamp.
"""
out_doc_tags: list[DocTagLine] = []
lyric_entries: list[tuple[int, str]] = []
offset_ms = 0
# Resolve offset first so it applies to all lyric lines, independent of tag position.
for line in self._lines:
if isinstance(line, DocTagLine) and line.key.strip().lower() == "offset":
parsed_offset = _parse_offset_value(line.value)
if parsed_offset is not None:
offset_ms = parsed_offset
for line in self._lines:
if isinstance(line, DocTagLine):
if line.key.strip().lower() == "offset":
continue
out_doc_tags.append(DocTagLine(key=line.key, value=line.value))
continue
assert isinstance(line, LyricLine)
lyric_text = line.text
if not lyric_text.strip():
continue
line_times = line.line_times_ms if line.line_times_ms else [0]
for time_ms in line_times:
shifted = max(0, time_ms + offset_ms)
lyric_entries.append((shifted, lyric_text))
lyric_entries.sort(key=lambda item: item[0])
out_lyrics: list[LyricLine] = [
LyricLine(line_times_ms=[time_ms], words=[LrcWordSegment(text=text)])
for time_ms, text in lyric_entries
]
ret = LRCData()
ret._lines = [*out_doc_tags, *out_lyrics]
ret._doc_tags = {line.key: line.value for line in out_doc_tags}
return ret
def to_plain( def to_plain(
self, self,
deduplicate: bool = False, deduplicate: bool = False,
@@ -366,27 +427,32 @@ class LRCData:
return "\n".join(sorted_lines).strip() return "\n".join(sorted_lines).strip()
def to_unsynced(self) -> "LRCData": @staticmethod
"""Return a plain-text based unsynced representation.""" def _serialize_lines(lines: list[BaseLine], include_word_sync: bool) -> str:
return LRCData(self.to_plain()) return "\n".join(
line.to_text(include_word_sync=include_word_sync) for line in lines
)
def to_text( def to_text(
self, self,
plain: bool = False,
include_word_sync: bool = False, include_word_sync: bool = False,
) -> str: ) -> str:
"""Serialize to LRC text or plain text. """Serialize to non-normalized LRC text.
- plain=True returns to_plain(). - Unsynced lyrics are converted to [00:00.00]-tagged form.
- include_word_sync controls rendering of per-word tags for word-sync lines. - include_word_sync only controls rendering of per-word tags.
- This method does not apply normalize() rules.
""" """
if plain: res = self if self.is_synced() else self.normalize_unsynced()
return self.to_plain(deduplicate=False) return self._serialize_lines(res._lines, include_word_sync=include_word_sync)
lines: list[str] = [ def to_normalized_text(self) -> str:
line.to_text(include_word_sync=include_word_sync) for line in self._lines """Serialize using normalize() rules.
]
return "\n".join(lines) Normalized output always strips word-sync tags.
"""
normalized = self.normalize()
return self._serialize_lines(normalized._lines, include_word_sync=False)
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]: def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
+166 -59
View File
@@ -1,11 +1,6 @@
from __future__ import annotations from __future__ import annotations
from lrx_cli.lrc import ( from lrx_cli.lrc import LRCData
LRCData,
DocTagLine,
LyricLine,
WordSyncLyricLine,
)
from lrx_cli.models import CacheStatus from lrx_cli.models import CacheStatus
@@ -65,14 +60,6 @@ def test_midline_line_tags_are_kept_as_plain_text() -> None:
assert normalized == "[00:01.00]Lyric [00:02.00]line" assert normalized == "[00:01.00]Lyric [00:02.00]line"
def test_normalize_tags_applies_positive_and_negative_offset_per_spec() -> None:
positive = _normalize("[offset:+1000]\n[00:10.00]line")
negative = _normalize("[offset:-500]\n[00:10.00]line")
assert positive == "[offset:+1000]\n[00:10.00]line"
assert negative == "[offset:-500]\n[00:10.00]line"
def test_leading_spaces_before_first_time_tag_are_trimmed() -> None: def test_leading_spaces_before_first_time_tag_are_trimmed() -> None:
raw = "\t [00:01.2] hello" raw = "\t [00:01.2] hello"
@@ -86,9 +73,8 @@ def test_normalize_tags_handles_consecutive_start_tags_with_spaces_between() ->
data = LRCData(raw) data = LRCData(raw)
assert len(data.lines) == 1 assert len(data.lines) == 1
assert isinstance(data.lines[0], LyricLine) assert str(data) == "[00:01.00][00:02.30]chorus"
assert data.lines[0].line_times_ms == [1000, 2300] assert data.to_plain() == "chorus\nchorus"
assert data.lines[0].text == "chorus"
def test_non_leading_time_like_text_is_plain_lyric() -> None: def test_non_leading_time_like_text_is_plain_lyric() -> None:
@@ -99,14 +85,6 @@ def test_non_leading_time_like_text_is_plain_lyric() -> None:
assert normalized == "intro [00:01]line" assert normalized == "intro [00:01]line"
def test_normalize_tags_removes_offset_tag_line_even_without_lyrics() -> None:
raw = "[offset:+500]"
normalized = _normalize(raw)
assert normalized == "[offset:+500]"
def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None: def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None:
plain_text = "just some lyrics\nwithout tags" plain_text = "just some lyrics\nwithout tags"
unsynced_text = "[00:00.00]a\n[00:00.00]b" unsynced_text = "[00:00.00]a\n[00:00.00]b"
@@ -137,6 +115,117 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
) )
def test_normalize_unsynced_preserves_doc_tags_and_middle_blanks() -> None:
text = "\n".join(["[ar:Artist]", "", "[00:03.00]line", "[ti:Song]", "", " tail "])
normalized = LRCData(text).normalize_unsynced()
assert normalized.tags == {"ar": "Artist", "ti": "Song"}
assert str(normalized) == "\n".join(
[
"[ar:Artist]",
"[00:00.00]line",
"[ti:Song]",
"[00:00.00]",
"[00:00.00]tail",
]
)
def test_normalize_unsynced_strips_word_sync_markup_from_lyric_text() -> None:
text = "[00:02.00]<00:01.00>he <00:01.50>llo"
normalized = str(LRCData(text).normalize_unsynced())
assert normalized == "[00:00.00]he llo"
def test_normalize_unsynced_result_is_always_unsynced() -> None:
text = "[00:05.00]a\n[00:10.00]b"
normalized = LRCData(text).normalize_unsynced()
assert normalized.is_synced() is False
assert normalized.detect_sync_status() is CacheStatus.SUCCESS_UNSYNCED
def test_normalize_moves_doc_tags_to_top_and_removes_offset_tag() -> None:
text = "\n".join(
[
"[00:02.00]b",
"[ar:Artist]",
"[offset:500]",
"[00:01.00]a",
"[ti:Song]",
]
)
normalized = LRCData(text).to_normalized_text()
assert normalized == "\n".join(
[
"[ar:Artist]",
"[ti:Song]",
"[00:01.50]a",
"[00:02.50]b",
]
)
def test_normalize_expands_multi_time_tags_and_sorts_lyrics() -> None:
text = "\n".join(
[
"[00:03.00]c",
"[00:02.00][00:01.00]x",
]
)
normalized = LRCData(text).to_normalized_text()
assert normalized == "\n".join(["[00:01.00]x", "[00:02.00]x", "[00:03.00]c"])
def test_normalize_converts_unsynced_lines_and_removes_word_sync_tags() -> None:
text = "\n".join(
[
"plain",
"<00:01.00>he <00:01.50>llo",
"[00:02.00]<00:02.20>world",
"",
]
)
normalized = LRCData(text).to_normalized_text()
assert normalized == "\n".join(
[
"[00:00.00]plain",
"[00:00.00]he llo",
"[00:02.00]world",
]
)
def test_to_normalized_text_is_separate_from_plain() -> None:
data = LRCData("[offset:500]\n[00:02.00]b\n[00:01.00]a")
assert data.to_plain() == "a\nb"
assert data.to_normalized_text() == "[00:01.50]a\n[00:02.50]b"
def test_to_text_default_forces_unsynced_tagging() -> None:
data = LRCData("line\nother")
assert data.to_text() == "[00:00.00]line\n[00:00.00]other"
def test_str_keeps_raw_unsynced_while_to_text_adds_unsynced_tags() -> None:
data = LRCData("line\nother")
assert str(data) == "line\nother"
assert data.to_text() == "[00:00.00]line\n[00:00.00]other"
def test_to_plain_duplicates_lines_for_multi_line_times() -> None: def test_to_plain_duplicates_lines_for_multi_line_times() -> None:
text = "\n".join( text = "\n".join(
[ [
@@ -209,14 +298,13 @@ def test_reformat_pipeline_trims_outer_blanks_and_preserves_inner_blanks() -> No
assert normalized == "[00:01.00]a\n\n[00:02.00]b" assert normalized == "[00:01.00]a\n\n[00:02.00]b"
def test_single_doc_tag_line_is_not_added_to_lines() -> None: def test_single_doc_tag_line_is_preserved_and_registered() -> None:
data = LRCData("[ar:Artist]\n[00:01.00]line") data = LRCData("[ar:Artist]\n[00:01.00]line")
assert data.tags == {"ar": "Artist"} assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 2 assert len(data.lines) == 2
assert isinstance(data.lines[0], DocTagLine) assert str(data) == "[ar:Artist]\n[00:01.00]line"
assert isinstance(data.lines[1], LyricLine) assert data.to_plain() == "line"
assert data.lines[1].text == "line"
def test_multiple_doc_tags_on_one_line_are_plain_lyrics() -> None: def test_multiple_doc_tags_on_one_line_are_plain_lyrics() -> None:
@@ -232,8 +320,8 @@ def test_doc_tag_after_lyrics_is_treated_as_lyrics() -> None:
assert data.tags == {"ar": "Artist"} assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 2 assert len(data.lines) == 2
assert isinstance(data.lines[1], DocTagLine) assert str(data) == "[00:01.00]line\n[ar:Artist]"
assert data.lines[1].text == "[ar:Artist]" assert data.to_plain() == "line"
def test_unknown_lines_before_lyrics_are_preserved_and_do_not_start_lyrics() -> None: def test_unknown_lines_before_lyrics_are_preserved_and_do_not_start_lyrics() -> None:
@@ -241,10 +329,8 @@ def test_unknown_lines_before_lyrics_are_preserved_and_do_not_start_lyrics() ->
assert data.tags == {"ar": "Artist"} assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 3 assert len(data.lines) == 3
assert isinstance(data.lines[0], LyricLine) assert str(data) == "comment line\n[ar:Artist]\n[00:01.00]line"
assert isinstance(data.lines[1], DocTagLine) assert data.to_plain() == "line"
assert data.lines[2].text == "line"
assert str(data).startswith("comment line\n[ar:Artist]\n")
def test_to_plain_excludes_doc_tags_but_keeps_lyrics() -> None: def test_to_plain_excludes_doc_tags_but_keeps_lyrics() -> None:
@@ -257,50 +343,73 @@ def test_non_space_between_line_tags_stops_tag_parsing() -> None:
data = LRCData("[00:01.00]x[00:02.00]tail") data = LRCData("[00:01.00]x[00:02.00]tail")
assert len(data.lines) == 1 assert len(data.lines) == 1
assert isinstance(data.lines[0], LyricLine) assert str(data) == "[00:01.00]x[00:02.00]tail"
assert data.lines[0].line_times_ms == [1000] assert data.to_plain() == "x[00:02.00]tail"
assert data.lines[0].text == "x[00:02.00]tail"
def test_line_only_time_tag_is_valid_empty_lyric() -> None: def test_line_only_time_tag_is_valid_empty_lyric() -> None:
data = LRCData("[00:01.00]") data = LRCData("[00:01.00]")
assert len(data.lines) == 1 assert len(data.lines) == 1
assert isinstance(data.lines[0], LyricLine) assert str(data) == "[00:01.00]"
assert data.lines[0].line_times_ms == [1000] assert data.to_plain() == ""
assert data.lines[0].text == ""
def test_model_uses_subclass_for_word_sync_lines() -> None: def test_word_sync_markup_only_changes_output_when_enabled() -> None:
a = LRCData("[00:01.00]<00:00.50>lyric") a = LRCData("[00:01.00]<00:00.50>lyric")
b = LRCData("[00:01.00]lyric") b = LRCData("[00:01.00]lyric")
assert isinstance(a.lines[0], WordSyncLyricLine) assert a.to_text(include_word_sync=False) == "[00:01.00]lyric"
assert isinstance(b.lines[0], LyricLine) assert b.to_text(include_word_sync=False) == "[00:01.00]lyric"
assert not isinstance(b.lines[0], WordSyncLyricLine) assert a.to_text(include_word_sync=True) == "[00:01.00]<00:00.50>lyric"
assert b.to_text(include_word_sync=True) == "[00:01.00]lyric"
def test_str_preserves_word_sync_markup() -> None:
data = LRCData("[00:01.00]<00:00.50>lyric")
assert str(data) == "[00:01.00]<00:00.50>lyric"
assert data.to_text(include_word_sync=False) == "[00:01.00]lyric"
def test_str_preserves_offset_tag_and_does_not_apply_it() -> None:
data = LRCData("[offset:500]\n[00:01.00]a")
assert str(data) == "[offset:500]\n[00:01.00]a"
assert data.to_normalized_text() == "[00:01.50]a"
def test_str_preserves_doc_tag_order_and_duplicates_exactly() -> None:
data = LRCData("[ar:First]\n[ti:Song]\n[ar:Second]\n[00:01.00]line")
assert str(data) == "[ar:First]\n[ti:Song]\n[ar:Second]\n[00:01.00]line"
def test_str_does_not_expand_or_sort_multi_time_lines() -> None:
data = LRCData("[00:03.00]c\n[00:02.00][00:01.00]x")
assert str(data) == "[00:03.00]c\n[00:02.00][00:01.00]x"
assert data.to_normalized_text() == "[00:01.00]x\n[00:02.00]x\n[00:03.00]c"
def test_str_preserves_plain_text_lines_without_injecting_time_tags() -> None:
data = LRCData("plain line\n[ar:Artist]\nother line")
assert str(data) == "plain line\n[ar:Artist]\nother line"
assert data.to_text() == "[00:00.00]plain line\n[ar:Artist]\n[00:00.00]other line"
def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None: def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None:
data = LRCData("[00:01.00]<00:02.00>") data = LRCData("[00:01.00]<00:02.00>")
assert isinstance(data.lines[0], WordSyncLyricLine)
assert data.to_text(include_word_sync=False) == "[00:01.00]" assert data.to_text(include_word_sync=False) == "[00:01.00]"
assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>" assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>"
def test_to_text_plain_true_matches_to_plain_output() -> None: def test_to_plain_outputs_sorted_plain_text() -> None:
data = LRCData("[00:02.00]b\n[00:01.00]a") data = LRCData("[00:02.00]b\n[00:01.00]a")
assert data.to_text(plain=True) == data.to_plain() assert data.to_plain() == "a\nb"
def test_to_unsynced_converts_to_plain_based_unsynced_data() -> None:
data = LRCData("[ar:Artist]\n[00:02.00]b\n[00:01.00]a")
unsynced = data.to_unsynced()
assert isinstance(unsynced, LRCData)
assert str(unsynced) == "a\nb"
def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None: def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None:
@@ -308,8 +417,6 @@ def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None:
assert data.tags == {"ar": "Second"} assert data.tags == {"ar": "Second"}
assert len(data.lines) == 3 assert len(data.lines) == 3
assert isinstance(data.lines[0], DocTagLine)
assert isinstance(data.lines[1], DocTagLine)
assert str(data).startswith("[ar:First]\n[ar:Second]\n") assert str(data).startswith("[ar:First]\n[ar:Second]\n")
Generated
+1 -1
View File
@@ -153,7 +153,7 @@ wheels = [
[[package]] [[package]]
name = "lrx-cli" name = "lrx-cli"
version = "0.6.2" version = "0.6.3"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "cyclopts" }, { name = "cyclopts" },