diff --git a/lrx_cli/lrc.py b/lrx_cli/lrc.py index b62e34b..489191e 100644 --- a/lrx_cli/lrc.py +++ b/lrx_cli/lrc.py @@ -16,7 +16,7 @@ from .models import CacheStatus _RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]") # Standard format after normalization: [mm:ss.cc] -_STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]") +# _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]") # Standard format with capture groups _STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]") @@ -43,6 +43,21 @@ def _remove_pattern(text: str, pattern: re.Pattern) -> str: return pattern.sub("", text).strip() +def _raw_tag_to_ms(mm: str, ss: str, frac: Optional[str]) -> int: + """Convert parsed time tag components to total milliseconds.""" + if frac is None: + ms = 0 + else: + n = len(frac) + if n == 1: + ms = int(frac) * 100 + elif n == 2: + ms = int(frac) * 10 + else: + ms = int(frac) + return (int(mm) * 60 + int(ss)) * 1000 + ms + + def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str: """Convert parsed time tag components to standard [mm:ss.cc] string.""" if frac is None: @@ -225,40 +240,36 @@ class LRCData: _remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines ).strip("\n") - lines = [] + tagged_lines = [] for line in self._lines: pos = 0 - cnt = 0 - plain_line = "" + tag_ms = [] while True: # Only match strictly repeated standard time tags at the start of the line # Lines without any time tags are ignored. # Lyric lines are considered already stripped of whitespaces, so no strips here. - m = _STD_TAG_RE.match(line, pos) + m = _STD_TAG_CAPTURE_RE.match(line, pos) if not m: - plain_line += line[pos:] + lyric = line[pos:] + for tag in tag_ms: + tagged_lines.append((tag, lyric)) break + tag_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3))) pos = m.end() - cnt += 1 - # Also avoid dulplicating blank lines - if deduplicate or not plain_line: - if cnt > 0: - lines.append(plain_line) - else: - for _ in range(cnt): - lines.append(plain_line) + + sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])] if deduplicate: # Remove consecutive duplicates deduped_lines = [] prev_line = None - for line in lines: + for line in sorted_lines: if line != prev_line: deduped_lines.append(line) prev_line = line - lines = deduped_lines + sorted_lines = deduped_lines - return "\n".join(lines).strip() + return "\n".join(sorted_lines).strip() def print_lyrics( self, diff --git a/tests/test_lrc.py b/tests/test_lrc.py index 67c3299..a8d42a6 100644 --- a/tests/test_lrc.py +++ b/tests/test_lrc.py @@ -1,6 +1,6 @@ from __future__ import annotations -from lrx_cli.lrc import LRCData +from lrx_cli.lrc import LRCData, _raw_tag_to_ms from lrx_cli.models import CacheStatus @@ -8,6 +8,13 @@ def _normalize(text: str) -> str: return str(LRCData(text)) +def test_raw_tag_to_ms_parses_common_fraction_formats() -> None: + assert _raw_tag_to_ms("00", "00", None) == 0 + assert _raw_tag_to_ms("00", "01", "2") == 1200 + assert _raw_tag_to_ms("00", "01", "23") == 1230 + assert _raw_tag_to_ms("00", "01", "234") == 1234 + + def test_normalize_tags_supports_all_raw_time_formats() -> None: raw = "\n".join( [ @@ -143,7 +150,7 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None: def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None: text = "\n".join( [ - "[00:01.00][00:02.00]hello", + "[00:02.00][00:01.00]hello", "[00:03.00]world", "no-tag-line", "[00:00.00]zero-only", @@ -153,8 +160,22 @@ def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None: plain = LRCData(text).to_plain() # In synced mode, lines with standard tags are kept (including [00:00.00]), - # while lines without leading standard tags are ignored. - assert plain == "\n".join(["hello", "hello", "world", "zero-only"]) + # lines without leading standard tags are ignored, and output is sorted by tag timestamp. + assert plain == "\n".join(["zero-only", "hello", "hello", "world"]) + + +def test_to_plain_sorts_lines_by_timestamp_across_lines() -> None: + text = "\n".join( + [ + "[00:05.00]late", + "[00:01.00]early", + "[00:03.00]middle", + ] + ) + + plain = LRCData(text).to_plain() + + assert plain == "\n".join(["early", "middle", "late"]) def test_to_plain_deduplicate_collapses_only_consecutive_equals() -> None: