fix: --plain now sorts lyrics to handle multi-tag lrc correctly

2026-04-02 09:51:08 +02:00
parent 7ebf51b78d
commit b5038fac80
2 changed files with 53 additions and 21 deletions
@@ -16,7 +16,7 @@ from .models import CacheStatus
 _RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
 # Standard format after normalization: [mm:ss.cc]
-_STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
+# _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
 # Standard format with capture groups
 _STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
@@ -43,6 +43,21 @@ def _remove_pattern(text: str, pattern: re.Pattern) -> str:
    return pattern.sub("", text).strip()
 def _raw_tag_to_ms(mm: str, ss: str, frac: Optional[str]) -> int:
    """Convert parsed time tag components to total milliseconds."""
    if frac is None:
        ms = 0
    else:
        n = len(frac)
        if n == 1:
            ms = int(frac) * 100
        elif n == 2:
            ms = int(frac) * 10
        else:
            ms = int(frac)
    return (int(mm) * 60 + int(ss)) * 1000 + ms
 def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
    """Convert parsed time tag components to standard [mm:ss.cc] string."""
    if frac is None:
@@ -225,40 +240,36 @@ class LRCData:
                _remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines
            ).strip("\n")
-        lines = []
+        tagged_lines = []
        for line in self._lines:
            pos = 0
-            cnt = 0
+            tag_ms = []
            plain_line = ""
            while True:
                # Only match strictly repeated standard time tags at the start of the line
                # Lines without any time tags are ignored.
                # Lyric lines are considered already stripped of whitespaces, so no strips here.
-                m = _STD_TAG_RE.match(line, pos)
+                m = _STD_TAG_CAPTURE_RE.match(line, pos)
                if not m:
-                    plain_line += line[pos:]
+                    lyric = line[pos:]
                    for tag in tag_ms:
                        tagged_lines.append((tag, lyric))
                    break
                tag_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
                pos = m.end()
-                cnt += 1
+
-            # Also avoid dulplicating blank lines
+        sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])]
            if deduplicate or not plain_line:
                if cnt > 0:
                    lines.append(plain_line)
            else:
                for _ in range(cnt):
                    lines.append(plain_line)
        if deduplicate:
            # Remove consecutive duplicates
            deduped_lines = []
            prev_line = None
-            for line in lines:
+            for line in sorted_lines:
                if line != prev_line:
                    deduped_lines.append(line)
                prev_line = line
-            lines = deduped_lines
+            sorted_lines = deduped_lines
-        return "\n".join(lines).strip()
+        return "\n".join(sorted_lines).strip()
    def print_lyrics(
        self,
@@ -1,6 +1,6 @@
 from __future__ import annotations
-from lrx_cli.lrc import LRCData
+from lrx_cli.lrc import LRCData, _raw_tag_to_ms
 from lrx_cli.models import CacheStatus
@@ -8,6 +8,13 @@ def _normalize(text: str) -> str:
    return str(LRCData(text))
 def test_raw_tag_to_ms_parses_common_fraction_formats() -> None:
    assert _raw_tag_to_ms("00", "00", None) == 0
    assert _raw_tag_to_ms("00", "01", "2") == 1200
    assert _raw_tag_to_ms("00", "01", "23") == 1230
    assert _raw_tag_to_ms("00", "01", "234") == 1234
 def test_normalize_tags_supports_all_raw_time_formats() -> None:
    raw = "\n".join(
        [
@@ -143,7 +150,7 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
 def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
    text = "\n".join(
        [
-            "[00:01.00][00:02.00]hello",
+            "[00:02.00][00:01.00]hello",
            "[00:03.00]world",
            "no-tag-line",
            "[00:00.00]zero-only",
@@ -153,8 +160,22 @@ def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
    plain = LRCData(text).to_plain()
    # In synced mode, lines with standard tags are kept (including [00:00.00]),
-    # while lines without leading standard tags are ignored.
+    # lines without leading standard tags are ignored, and output is sorted by tag timestamp.
-    assert plain == "\n".join(["hello", "hello", "world", "zero-only"])
+    assert plain == "\n".join(["zero-only", "hello", "hello", "world"])
 def test_to_plain_sorts_lines_by_timestamp_across_lines() -> None:
    text = "\n".join(
        [
            "[00:05.00]late",
            "[00:01.00]early",
            "[00:03.00]middle",
        ]
    )
    plain = LRCData(text).to_plain()
    assert plain == "\n".join(["early", "middle", "late"])
 def test_to_plain_deduplicate_collapses_only_consecutive_equals() -> None: