feat: normalize option
This commit is contained in:
+1
-1
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "lrx-cli"
|
name = "lrx-cli"
|
||||||
version = "0.6.2"
|
version = "0.6.3"
|
||||||
description = "Fetch line-synced lyrics for your music player."
|
description = "Fetch line-synced lyrics for your music player."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
|
|||||||
+48
-6
@@ -99,7 +99,17 @@ def fetch(
|
|||||||
plain: Annotated[
|
plain: Annotated[
|
||||||
bool,
|
bool,
|
||||||
cyclopts.Parameter(
|
cyclopts.Parameter(
|
||||||
name="--plain", negative="", help="Output only the raw lyrics without tags."
|
name="--plain",
|
||||||
|
negative="",
|
||||||
|
help="Output only plain lyrics without tags (highest priority over --normalize).",
|
||||||
|
),
|
||||||
|
] = False,
|
||||||
|
normalize: Annotated[
|
||||||
|
bool,
|
||||||
|
cyclopts.Parameter(
|
||||||
|
name="--normalize",
|
||||||
|
negative="",
|
||||||
|
help="Output normalized LRC (ignored when --plain is also set).",
|
||||||
),
|
),
|
||||||
] = False,
|
] = False,
|
||||||
):
|
):
|
||||||
@@ -123,7 +133,12 @@ def fetch(
|
|||||||
logger.error("No lyrics found.")
|
logger.error("No lyrics found.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(result.lyrics.to_text(plain=plain))
|
if plain:
|
||||||
|
print(result.lyrics.to_plain())
|
||||||
|
elif normalize:
|
||||||
|
print(result.lyrics.to_normalized_text())
|
||||||
|
else:
|
||||||
|
print(result.lyrics.to_text())
|
||||||
|
|
||||||
|
|
||||||
# search
|
# search
|
||||||
@@ -179,7 +194,17 @@ def search(
|
|||||||
plain: Annotated[
|
plain: Annotated[
|
||||||
bool,
|
bool,
|
||||||
cyclopts.Parameter(
|
cyclopts.Parameter(
|
||||||
name="--plain", negative="", help="Output only the raw lyrics without tags."
|
name="--plain",
|
||||||
|
negative="",
|
||||||
|
help="Output only plain lyrics without tags (highest priority over --normalize).",
|
||||||
|
),
|
||||||
|
] = False,
|
||||||
|
normalize: Annotated[
|
||||||
|
bool,
|
||||||
|
cyclopts.Parameter(
|
||||||
|
name="--normalize",
|
||||||
|
negative="",
|
||||||
|
help="Output normalized LRC (ignored when --plain is also set).",
|
||||||
),
|
),
|
||||||
] = False,
|
] = False,
|
||||||
):
|
):
|
||||||
@@ -214,7 +239,12 @@ def search(
|
|||||||
logger.error("No lyrics found.")
|
logger.error("No lyrics found.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(result.lyrics.to_text(plain=plain))
|
if plain:
|
||||||
|
print(result.lyrics.to_plain())
|
||||||
|
elif normalize:
|
||||||
|
print(result.lyrics.to_normalized_text())
|
||||||
|
else:
|
||||||
|
print(result.lyrics.to_text())
|
||||||
|
|
||||||
|
|
||||||
# export
|
# export
|
||||||
@@ -253,7 +283,17 @@ def export(
|
|||||||
plain: Annotated[
|
plain: Annotated[
|
||||||
bool,
|
bool,
|
||||||
cyclopts.Parameter(
|
cyclopts.Parameter(
|
||||||
name="--plain", negative="", help="Export only the raw lyrics without tags."
|
name="--plain",
|
||||||
|
negative="",
|
||||||
|
help="Export only plain lyrics (.txt, highest priority over --normalize).",
|
||||||
|
),
|
||||||
|
] = False,
|
||||||
|
normalize: Annotated[
|
||||||
|
bool,
|
||||||
|
cyclopts.Parameter(
|
||||||
|
name="--normalize",
|
||||||
|
negative="",
|
||||||
|
help="Export normalized LRC output (ignored when --plain is also set).",
|
||||||
),
|
),
|
||||||
] = False,
|
] = False,
|
||||||
):
|
):
|
||||||
@@ -307,8 +347,10 @@ def export(
|
|||||||
with open(output, "w", encoding="utf-8") as f:
|
with open(output, "w", encoding="utf-8") as f:
|
||||||
if plain:
|
if plain:
|
||||||
f.write(result.lyrics.to_plain())
|
f.write(result.lyrics.to_plain())
|
||||||
|
elif normalize:
|
||||||
|
f.write(result.lyrics.to_normalized_text())
|
||||||
else:
|
else:
|
||||||
f.write(str(result.lyrics))
|
f.write(result.lyrics.to_text())
|
||||||
logger.info(f"Exported lyrics to {output}")
|
logger.info(f"Exported lyrics to {output}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to write file: {e}")
|
logger.error(f"Failed to write file: {e}")
|
||||||
|
|||||||
+82
-12
@@ -233,6 +233,14 @@ def _is_single_doc_tag_line(line: str) -> Optional[tuple[str, str]]:
|
|||||||
return key, value
|
return key, value
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_offset_value(value: str) -> Optional[int]:
|
||||||
|
"""Parse doc offset value in milliseconds, returning None for invalid values."""
|
||||||
|
try:
|
||||||
|
return int(value.strip())
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class LRCData:
|
class LRCData:
|
||||||
_lines: list[BaseLine]
|
_lines: list[BaseLine]
|
||||||
_doc_tags: dict[str, str]
|
_doc_tags: dict[str, str]
|
||||||
@@ -265,7 +273,7 @@ class LRCData:
|
|||||||
self._lines = parsed
|
self._lines = parsed
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return self.to_text(plain=False, include_word_sync=False)
|
return self._serialize_lines(self._lines, include_word_sync=True)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"LRCData(doc_tags={self._doc_tags!r}, lines={self._lines!r})"
|
return f"LRCData(doc_tags={self._doc_tags!r}, lines={self._lines!r})"
|
||||||
@@ -293,7 +301,7 @@ class LRCData:
|
|||||||
else CacheStatus.SUCCESS_UNSYNCED
|
else CacheStatus.SUCCESS_UNSYNCED
|
||||||
)
|
)
|
||||||
|
|
||||||
def normalize_unsynced(self):
|
def normalize_unsynced(self) -> "LRCData":
|
||||||
"""Convert lyrics into unsynced LRC form with [00:00.00] tags.
|
"""Convert lyrics into unsynced LRC form with [00:00.00] tags.
|
||||||
|
|
||||||
- Leading blank lyric lines are skipped.
|
- Leading blank lyric lines are skipped.
|
||||||
@@ -329,6 +337,59 @@ class LRCData:
|
|||||||
ret._doc_tags = dict(self._doc_tags)
|
ret._doc_tags = dict(self._doc_tags)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def normalize(self) -> "LRCData":
|
||||||
|
"""Normalize LRC for decode/export oriented output.
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Move all doc tags to the beginning, preserving line order and duplicates.
|
||||||
|
- Keep doc tags unchanged except removing all offset tags.
|
||||||
|
- Remove word-sync tags.
|
||||||
|
- Convert untagged non-empty lyric lines to [00:00.00] lyrics.
|
||||||
|
- Drop empty lyric lines.
|
||||||
|
- Expand lyric lines with multiple time tags into one line per tag.
|
||||||
|
- Apply offset (ms) to lyric timestamps and sort by timestamp.
|
||||||
|
"""
|
||||||
|
out_doc_tags: list[DocTagLine] = []
|
||||||
|
lyric_entries: list[tuple[int, str]] = []
|
||||||
|
offset_ms = 0
|
||||||
|
|
||||||
|
# Resolve offset first so it applies to all lyric lines, independent of tag position.
|
||||||
|
for line in self._lines:
|
||||||
|
if isinstance(line, DocTagLine) and line.key.strip().lower() == "offset":
|
||||||
|
parsed_offset = _parse_offset_value(line.value)
|
||||||
|
if parsed_offset is not None:
|
||||||
|
offset_ms = parsed_offset
|
||||||
|
|
||||||
|
for line in self._lines:
|
||||||
|
if isinstance(line, DocTagLine):
|
||||||
|
if line.key.strip().lower() == "offset":
|
||||||
|
continue
|
||||||
|
out_doc_tags.append(DocTagLine(key=line.key, value=line.value))
|
||||||
|
continue
|
||||||
|
|
||||||
|
assert isinstance(line, LyricLine)
|
||||||
|
|
||||||
|
lyric_text = line.text
|
||||||
|
if not lyric_text.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
line_times = line.line_times_ms if line.line_times_ms else [0]
|
||||||
|
for time_ms in line_times:
|
||||||
|
shifted = max(0, time_ms + offset_ms)
|
||||||
|
lyric_entries.append((shifted, lyric_text))
|
||||||
|
|
||||||
|
lyric_entries.sort(key=lambda item: item[0])
|
||||||
|
|
||||||
|
out_lyrics: list[LyricLine] = [
|
||||||
|
LyricLine(line_times_ms=[time_ms], words=[LrcWordSegment(text=text)])
|
||||||
|
for time_ms, text in lyric_entries
|
||||||
|
]
|
||||||
|
|
||||||
|
ret = LRCData()
|
||||||
|
ret._lines = [*out_doc_tags, *out_lyrics]
|
||||||
|
ret._doc_tags = {line.key: line.value for line in out_doc_tags}
|
||||||
|
return ret
|
||||||
|
|
||||||
def to_plain(
|
def to_plain(
|
||||||
self,
|
self,
|
||||||
deduplicate: bool = False,
|
deduplicate: bool = False,
|
||||||
@@ -366,23 +427,32 @@ class LRCData:
|
|||||||
|
|
||||||
return "\n".join(sorted_lines).strip()
|
return "\n".join(sorted_lines).strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _serialize_lines(lines: list[BaseLine], include_word_sync: bool) -> str:
|
||||||
|
return "\n".join(
|
||||||
|
line.to_text(include_word_sync=include_word_sync) for line in lines
|
||||||
|
)
|
||||||
|
|
||||||
def to_text(
|
def to_text(
|
||||||
self,
|
self,
|
||||||
plain: bool = False,
|
|
||||||
include_word_sync: bool = False,
|
include_word_sync: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Serialize to LRC text or plain text.
|
"""Serialize to non-normalized LRC text.
|
||||||
|
|
||||||
- plain=True returns to_plain().
|
- Unsynced lyrics are converted to [00:00.00]-tagged form.
|
||||||
- include_word_sync controls rendering of per-word tags for word-sync lines.
|
- include_word_sync only controls rendering of per-word tags.
|
||||||
|
- This method does not apply normalize() rules.
|
||||||
"""
|
"""
|
||||||
if plain:
|
res = self if self.is_synced() else self.normalize_unsynced()
|
||||||
return self.to_plain(deduplicate=False)
|
return self._serialize_lines(res._lines, include_word_sync=include_word_sync)
|
||||||
|
|
||||||
lines: list[str] = [
|
def to_normalized_text(self) -> str:
|
||||||
line.to_text(include_word_sync=include_word_sync) for line in self._lines
|
"""Serialize using normalize() rules.
|
||||||
]
|
|
||||||
return "\n".join(lines)
|
Normalized output always strips word-sync tags.
|
||||||
|
"""
|
||||||
|
normalized = self.normalize()
|
||||||
|
return self._serialize_lines(normalized._lines, include_word_sync=False)
|
||||||
|
|
||||||
|
|
||||||
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
|
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
|
||||||
|
|||||||
+113
-2
@@ -149,6 +149,83 @@ def test_normalize_unsynced_result_is_always_unsynced() -> None:
|
|||||||
assert normalized.detect_sync_status() is CacheStatus.SUCCESS_UNSYNCED
|
assert normalized.detect_sync_status() is CacheStatus.SUCCESS_UNSYNCED
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_moves_doc_tags_to_top_and_removes_offset_tag() -> None:
|
||||||
|
text = "\n".join(
|
||||||
|
[
|
||||||
|
"[00:02.00]b",
|
||||||
|
"[ar:Artist]",
|
||||||
|
"[offset:500]",
|
||||||
|
"[00:01.00]a",
|
||||||
|
"[ti:Song]",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
normalized = LRCData(text).to_normalized_text()
|
||||||
|
|
||||||
|
assert normalized == "\n".join(
|
||||||
|
[
|
||||||
|
"[ar:Artist]",
|
||||||
|
"[ti:Song]",
|
||||||
|
"[00:01.50]a",
|
||||||
|
"[00:02.50]b",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_expands_multi_time_tags_and_sorts_lyrics() -> None:
|
||||||
|
text = "\n".join(
|
||||||
|
[
|
||||||
|
"[00:03.00]c",
|
||||||
|
"[00:02.00][00:01.00]x",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
normalized = LRCData(text).to_normalized_text()
|
||||||
|
|
||||||
|
assert normalized == "\n".join(["[00:01.00]x", "[00:02.00]x", "[00:03.00]c"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_converts_unsynced_lines_and_removes_word_sync_tags() -> None:
|
||||||
|
text = "\n".join(
|
||||||
|
[
|
||||||
|
"plain",
|
||||||
|
"<00:01.00>he <00:01.50>llo",
|
||||||
|
"[00:02.00]<00:02.20>world",
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
normalized = LRCData(text).to_normalized_text()
|
||||||
|
|
||||||
|
assert normalized == "\n".join(
|
||||||
|
[
|
||||||
|
"[00:00.00]plain",
|
||||||
|
"[00:00.00]he llo",
|
||||||
|
"[00:02.00]world",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_normalized_text_is_separate_from_plain() -> None:
|
||||||
|
data = LRCData("[offset:500]\n[00:02.00]b\n[00:01.00]a")
|
||||||
|
|
||||||
|
assert data.to_plain() == "a\nb"
|
||||||
|
assert data.to_normalized_text() == "[00:01.50]a\n[00:02.50]b"
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_text_default_forces_unsynced_tagging() -> None:
|
||||||
|
data = LRCData("line\nother")
|
||||||
|
|
||||||
|
assert data.to_text() == "[00:00.00]line\n[00:00.00]other"
|
||||||
|
|
||||||
|
|
||||||
|
def test_str_keeps_raw_unsynced_while_to_text_adds_unsynced_tags() -> None:
|
||||||
|
data = LRCData("line\nother")
|
||||||
|
|
||||||
|
assert str(data) == "line\nother"
|
||||||
|
assert data.to_text() == "[00:00.00]line\n[00:00.00]other"
|
||||||
|
|
||||||
|
|
||||||
def test_to_plain_duplicates_lines_for_multi_line_times() -> None:
|
def test_to_plain_duplicates_lines_for_multi_line_times() -> None:
|
||||||
text = "\n".join(
|
text = "\n".join(
|
||||||
[
|
[
|
||||||
@@ -288,6 +365,40 @@ def test_word_sync_markup_only_changes_output_when_enabled() -> None:
|
|||||||
assert b.to_text(include_word_sync=True) == "[00:01.00]lyric"
|
assert b.to_text(include_word_sync=True) == "[00:01.00]lyric"
|
||||||
|
|
||||||
|
|
||||||
|
def test_str_preserves_word_sync_markup() -> None:
|
||||||
|
data = LRCData("[00:01.00]<00:00.50>lyric")
|
||||||
|
|
||||||
|
assert str(data) == "[00:01.00]<00:00.50>lyric"
|
||||||
|
assert data.to_text(include_word_sync=False) == "[00:01.00]lyric"
|
||||||
|
|
||||||
|
|
||||||
|
def test_str_preserves_offset_tag_and_does_not_apply_it() -> None:
|
||||||
|
data = LRCData("[offset:500]\n[00:01.00]a")
|
||||||
|
|
||||||
|
assert str(data) == "[offset:500]\n[00:01.00]a"
|
||||||
|
assert data.to_normalized_text() == "[00:01.50]a"
|
||||||
|
|
||||||
|
|
||||||
|
def test_str_preserves_doc_tag_order_and_duplicates_exactly() -> None:
|
||||||
|
data = LRCData("[ar:First]\n[ti:Song]\n[ar:Second]\n[00:01.00]line")
|
||||||
|
|
||||||
|
assert str(data) == "[ar:First]\n[ti:Song]\n[ar:Second]\n[00:01.00]line"
|
||||||
|
|
||||||
|
|
||||||
|
def test_str_does_not_expand_or_sort_multi_time_lines() -> None:
|
||||||
|
data = LRCData("[00:03.00]c\n[00:02.00][00:01.00]x")
|
||||||
|
|
||||||
|
assert str(data) == "[00:03.00]c\n[00:02.00][00:01.00]x"
|
||||||
|
assert data.to_normalized_text() == "[00:01.00]x\n[00:02.00]x\n[00:03.00]c"
|
||||||
|
|
||||||
|
|
||||||
|
def test_str_preserves_plain_text_lines_without_injecting_time_tags() -> None:
|
||||||
|
data = LRCData("plain line\n[ar:Artist]\nother line")
|
||||||
|
|
||||||
|
assert str(data) == "plain line\n[ar:Artist]\nother line"
|
||||||
|
assert data.to_text() == "[00:00.00]plain line\n[ar:Artist]\n[00:00.00]other line"
|
||||||
|
|
||||||
|
|
||||||
def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None:
|
def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None:
|
||||||
data = LRCData("[00:01.00]<00:02.00>")
|
data = LRCData("[00:01.00]<00:02.00>")
|
||||||
|
|
||||||
@@ -295,10 +406,10 @@ def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> No
|
|||||||
assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>"
|
assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>"
|
||||||
|
|
||||||
|
|
||||||
def test_to_text_plain_true_matches_to_plain_output() -> None:
|
def test_to_plain_outputs_sorted_plain_text() -> None:
|
||||||
data = LRCData("[00:02.00]b\n[00:01.00]a")
|
data = LRCData("[00:02.00]b\n[00:01.00]a")
|
||||||
|
|
||||||
assert data.to_text(plain=True) == data.to_plain()
|
assert data.to_plain() == "a\nb"
|
||||||
|
|
||||||
|
|
||||||
def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None:
|
def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user