diff --git a/pyproject.toml b/pyproject.toml index 95faec2..0ceff0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "lrx-cli" -version = "0.6.2" +version = "0.6.3" description = "Fetch line-synced lyrics for your music player." readme = "README.md" requires-python = ">=3.13" diff --git a/src/lrx_cli/cli.py b/src/lrx_cli/cli.py index 26b1014..555ff60 100644 --- a/src/lrx_cli/cli.py +++ b/src/lrx_cli/cli.py @@ -99,7 +99,17 @@ def fetch( plain: Annotated[ bool, cyclopts.Parameter( - name="--plain", negative="", help="Output only the raw lyrics without tags." + name="--plain", + negative="", + help="Output only plain lyrics without tags (highest priority over --normalize).", + ), + ] = False, + normalize: Annotated[ + bool, + cyclopts.Parameter( + name="--normalize", + negative="", + help="Output normalized LRC (ignored when --plain is also set).", ), ] = False, ): @@ -123,7 +133,12 @@ def fetch( logger.error("No lyrics found.") sys.exit(1) - print(result.lyrics.to_text(plain=plain)) + if plain: + print(result.lyrics.to_plain()) + elif normalize: + print(result.lyrics.to_normalized_text()) + else: + print(result.lyrics.to_text()) # search @@ -179,7 +194,17 @@ def search( plain: Annotated[ bool, cyclopts.Parameter( - name="--plain", negative="", help="Output only the raw lyrics without tags." + name="--plain", + negative="", + help="Output only plain lyrics without tags (highest priority over --normalize).", + ), + ] = False, + normalize: Annotated[ + bool, + cyclopts.Parameter( + name="--normalize", + negative="", + help="Output normalized LRC (ignored when --plain is also set).", ), ] = False, ): @@ -214,7 +239,12 @@ def search( logger.error("No lyrics found.") sys.exit(1) - print(result.lyrics.to_text(plain=plain)) + if plain: + print(result.lyrics.to_plain()) + elif normalize: + print(result.lyrics.to_normalized_text()) + else: + print(result.lyrics.to_text()) # export @@ -253,7 +283,17 @@ def export( plain: Annotated[ bool, cyclopts.Parameter( - name="--plain", negative="", help="Export only the raw lyrics without tags." + name="--plain", + negative="", + help="Export only plain lyrics (.txt, highest priority over --normalize).", + ), + ] = False, + normalize: Annotated[ + bool, + cyclopts.Parameter( + name="--normalize", + negative="", + help="Export normalized LRC output (ignored when --plain is also set).", ), ] = False, ): @@ -307,8 +347,10 @@ def export( with open(output, "w", encoding="utf-8") as f: if plain: f.write(result.lyrics.to_plain()) + elif normalize: + f.write(result.lyrics.to_normalized_text()) else: - f.write(str(result.lyrics)) + f.write(result.lyrics.to_text()) logger.info(f"Exported lyrics to {output}") except Exception as e: logger.error(f"Failed to write file: {e}") diff --git a/src/lrx_cli/lrc.py b/src/lrx_cli/lrc.py index eca80ba..f263214 100644 --- a/src/lrx_cli/lrc.py +++ b/src/lrx_cli/lrc.py @@ -233,6 +233,14 @@ def _is_single_doc_tag_line(line: str) -> Optional[tuple[str, str]]: return key, value +def _parse_offset_value(value: str) -> Optional[int]: + """Parse doc offset value in milliseconds, returning None for invalid values.""" + try: + return int(value.strip()) + except ValueError: + return None + + class LRCData: _lines: list[BaseLine] _doc_tags: dict[str, str] @@ -265,7 +273,7 @@ class LRCData: self._lines = parsed def __str__(self) -> str: - return self.to_text(plain=False, include_word_sync=False) + return self._serialize_lines(self._lines, include_word_sync=True) def __repr__(self) -> str: return f"LRCData(doc_tags={self._doc_tags!r}, lines={self._lines!r})" @@ -293,7 +301,7 @@ class LRCData: else CacheStatus.SUCCESS_UNSYNCED ) - def normalize_unsynced(self): + def normalize_unsynced(self) -> "LRCData": """Convert lyrics into unsynced LRC form with [00:00.00] tags. - Leading blank lyric lines are skipped. @@ -329,6 +337,59 @@ class LRCData: ret._doc_tags = dict(self._doc_tags) return ret + def normalize(self) -> "LRCData": + """Normalize LRC for decode/export oriented output. + + Rules: + - Move all doc tags to the beginning, preserving line order and duplicates. + - Keep doc tags unchanged except removing all offset tags. + - Remove word-sync tags. + - Convert untagged non-empty lyric lines to [00:00.00] lyrics. + - Drop empty lyric lines. + - Expand lyric lines with multiple time tags into one line per tag. + - Apply offset (ms) to lyric timestamps and sort by timestamp. + """ + out_doc_tags: list[DocTagLine] = [] + lyric_entries: list[tuple[int, str]] = [] + offset_ms = 0 + + # Resolve offset first so it applies to all lyric lines, independent of tag position. + for line in self._lines: + if isinstance(line, DocTagLine) and line.key.strip().lower() == "offset": + parsed_offset = _parse_offset_value(line.value) + if parsed_offset is not None: + offset_ms = parsed_offset + + for line in self._lines: + if isinstance(line, DocTagLine): + if line.key.strip().lower() == "offset": + continue + out_doc_tags.append(DocTagLine(key=line.key, value=line.value)) + continue + + assert isinstance(line, LyricLine) + + lyric_text = line.text + if not lyric_text.strip(): + continue + + line_times = line.line_times_ms if line.line_times_ms else [0] + for time_ms in line_times: + shifted = max(0, time_ms + offset_ms) + lyric_entries.append((shifted, lyric_text)) + + lyric_entries.sort(key=lambda item: item[0]) + + out_lyrics: list[LyricLine] = [ + LyricLine(line_times_ms=[time_ms], words=[LrcWordSegment(text=text)]) + for time_ms, text in lyric_entries + ] + + ret = LRCData() + ret._lines = [*out_doc_tags, *out_lyrics] + ret._doc_tags = {line.key: line.value for line in out_doc_tags} + return ret + def to_plain( self, deduplicate: bool = False, @@ -366,23 +427,32 @@ class LRCData: return "\n".join(sorted_lines).strip() + @staticmethod + def _serialize_lines(lines: list[BaseLine], include_word_sync: bool) -> str: + return "\n".join( + line.to_text(include_word_sync=include_word_sync) for line in lines + ) + def to_text( self, - plain: bool = False, include_word_sync: bool = False, ) -> str: - """Serialize to LRC text or plain text. + """Serialize to non-normalized LRC text. - - plain=True returns to_plain(). - - include_word_sync controls rendering of per-word tags for word-sync lines. + - Unsynced lyrics are converted to [00:00.00]-tagged form. + - include_word_sync only controls rendering of per-word tags. + - This method does not apply normalize() rules. """ - if plain: - return self.to_plain(deduplicate=False) + res = self if self.is_synced() else self.normalize_unsynced() + return self._serialize_lines(res._lines, include_word_sync=include_word_sync) - lines: list[str] = [ - line.to_text(include_word_sync=include_word_sync) for line in self._lines - ] - return "\n".join(lines) + def to_normalized_text(self) -> str: + """Serialize using normalize() rules. + + Normalized output always strips word-sync tags. + """ + normalized = self.normalize() + return self._serialize_lines(normalized._lines, include_word_sync=False) def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]: diff --git a/tests/test_lrc.py b/tests/test_lrc.py index 516b0ac..8350daf 100644 --- a/tests/test_lrc.py +++ b/tests/test_lrc.py @@ -149,6 +149,83 @@ def test_normalize_unsynced_result_is_always_unsynced() -> None: assert normalized.detect_sync_status() is CacheStatus.SUCCESS_UNSYNCED +def test_normalize_moves_doc_tags_to_top_and_removes_offset_tag() -> None: + text = "\n".join( + [ + "[00:02.00]b", + "[ar:Artist]", + "[offset:500]", + "[00:01.00]a", + "[ti:Song]", + ] + ) + + normalized = LRCData(text).to_normalized_text() + + assert normalized == "\n".join( + [ + "[ar:Artist]", + "[ti:Song]", + "[00:01.50]a", + "[00:02.50]b", + ] + ) + + +def test_normalize_expands_multi_time_tags_and_sorts_lyrics() -> None: + text = "\n".join( + [ + "[00:03.00]c", + "[00:02.00][00:01.00]x", + ] + ) + + normalized = LRCData(text).to_normalized_text() + + assert normalized == "\n".join(["[00:01.00]x", "[00:02.00]x", "[00:03.00]c"]) + + +def test_normalize_converts_unsynced_lines_and_removes_word_sync_tags() -> None: + text = "\n".join( + [ + "plain", + "<00:01.00>he <00:01.50>llo", + "[00:02.00]<00:02.20>world", + "", + ] + ) + + normalized = LRCData(text).to_normalized_text() + + assert normalized == "\n".join( + [ + "[00:00.00]plain", + "[00:00.00]he llo", + "[00:02.00]world", + ] + ) + + +def test_to_normalized_text_is_separate_from_plain() -> None: + data = LRCData("[offset:500]\n[00:02.00]b\n[00:01.00]a") + + assert data.to_plain() == "a\nb" + assert data.to_normalized_text() == "[00:01.50]a\n[00:02.50]b" + + +def test_to_text_default_forces_unsynced_tagging() -> None: + data = LRCData("line\nother") + + assert data.to_text() == "[00:00.00]line\n[00:00.00]other" + + +def test_str_keeps_raw_unsynced_while_to_text_adds_unsynced_tags() -> None: + data = LRCData("line\nother") + + assert str(data) == "line\nother" + assert data.to_text() == "[00:00.00]line\n[00:00.00]other" + + def test_to_plain_duplicates_lines_for_multi_line_times() -> None: text = "\n".join( [ @@ -288,6 +365,40 @@ def test_word_sync_markup_only_changes_output_when_enabled() -> None: assert b.to_text(include_word_sync=True) == "[00:01.00]lyric" +def test_str_preserves_word_sync_markup() -> None: + data = LRCData("[00:01.00]<00:00.50>lyric") + + assert str(data) == "[00:01.00]<00:00.50>lyric" + assert data.to_text(include_word_sync=False) == "[00:01.00]lyric" + + +def test_str_preserves_offset_tag_and_does_not_apply_it() -> None: + data = LRCData("[offset:500]\n[00:01.00]a") + + assert str(data) == "[offset:500]\n[00:01.00]a" + assert data.to_normalized_text() == "[00:01.50]a" + + +def test_str_preserves_doc_tag_order_and_duplicates_exactly() -> None: + data = LRCData("[ar:First]\n[ti:Song]\n[ar:Second]\n[00:01.00]line") + + assert str(data) == "[ar:First]\n[ti:Song]\n[ar:Second]\n[00:01.00]line" + + +def test_str_does_not_expand_or_sort_multi_time_lines() -> None: + data = LRCData("[00:03.00]c\n[00:02.00][00:01.00]x") + + assert str(data) == "[00:03.00]c\n[00:02.00][00:01.00]x" + assert data.to_normalized_text() == "[00:01.00]x\n[00:02.00]x\n[00:03.00]c" + + +def test_str_preserves_plain_text_lines_without_injecting_time_tags() -> None: + data = LRCData("plain line\n[ar:Artist]\nother line") + + assert str(data) == "plain line\n[ar:Artist]\nother line" + assert data.to_text() == "[00:00.00]plain line\n[ar:Artist]\n[00:00.00]other line" + + def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None: data = LRCData("[00:01.00]<00:02.00>") @@ -295,10 +406,10 @@ def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> No assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>" -def test_to_text_plain_true_matches_to_plain_output() -> None: +def test_to_plain_outputs_sorted_plain_text() -> None: data = LRCData("[00:02.00]b\n[00:01.00]a") - assert data.to_text(plain=True) == data.to_plain() + assert data.to_plain() == "a\nb" def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None: diff --git a/uv.lock b/uv.lock index 9761424..cbc4707 100644 --- a/uv.lock +++ b/uv.lock @@ -153,7 +153,7 @@ wheels = [ [[package]] name = "lrx-cli" -version = "0.6.2" +version = "0.6.3" source = { editable = "." } dependencies = [ { name = "cyclopts" },