fix: --plain now sorts lyrics to handle multi-tag lrc correctly

This commit is contained in:
2026-04-02 09:51:08 +02:00
parent 7ebf51b78d
commit b5038fac80
2 changed files with 53 additions and 21 deletions
+28 -17
View File
@@ -16,7 +16,7 @@ from .models import CacheStatus
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
# Standard format after normalization: [mm:ss.cc]
_STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
# _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
# Standard format with capture groups
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
@@ -43,6 +43,21 @@ def _remove_pattern(text: str, pattern: re.Pattern) -> str:
return pattern.sub("", text).strip()
def _raw_tag_to_ms(mm: str, ss: str, frac: Optional[str]) -> int:
"""Convert parsed time tag components to total milliseconds."""
if frac is None:
ms = 0
else:
n = len(frac)
if n == 1:
ms = int(frac) * 100
elif n == 2:
ms = int(frac) * 10
else:
ms = int(frac)
return (int(mm) * 60 + int(ss)) * 1000 + ms
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
"""Convert parsed time tag components to standard [mm:ss.cc] string."""
if frac is None:
@@ -225,40 +240,36 @@ class LRCData:
_remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines
).strip("\n")
lines = []
tagged_lines = []
for line in self._lines:
pos = 0
cnt = 0
plain_line = ""
tag_ms = []
while True:
# Only match strictly repeated standard time tags at the start of the line
# Lines without any time tags are ignored.
# Lyric lines are considered already stripped of whitespaces, so no strips here.
m = _STD_TAG_RE.match(line, pos)
m = _STD_TAG_CAPTURE_RE.match(line, pos)
if not m:
plain_line += line[pos:]
lyric = line[pos:]
for tag in tag_ms:
tagged_lines.append((tag, lyric))
break
tag_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
pos = m.end()
cnt += 1
# Also avoid dulplicating blank lines
if deduplicate or not plain_line:
if cnt > 0:
lines.append(plain_line)
else:
for _ in range(cnt):
lines.append(plain_line)
sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])]
if deduplicate:
# Remove consecutive duplicates
deduped_lines = []
prev_line = None
for line in lines:
for line in sorted_lines:
if line != prev_line:
deduped_lines.append(line)
prev_line = line
lines = deduped_lines
sorted_lines = deduped_lines
return "\n".join(lines).strip()
return "\n".join(sorted_lines).strip()
def print_lyrics(
self,
+25 -4
View File
@@ -1,6 +1,6 @@
from __future__ import annotations
from lrx_cli.lrc import LRCData
from lrx_cli.lrc import LRCData, _raw_tag_to_ms
from lrx_cli.models import CacheStatus
@@ -8,6 +8,13 @@ def _normalize(text: str) -> str:
return str(LRCData(text))
def test_raw_tag_to_ms_parses_common_fraction_formats() -> None:
assert _raw_tag_to_ms("00", "00", None) == 0
assert _raw_tag_to_ms("00", "01", "2") == 1200
assert _raw_tag_to_ms("00", "01", "23") == 1230
assert _raw_tag_to_ms("00", "01", "234") == 1234
def test_normalize_tags_supports_all_raw_time_formats() -> None:
raw = "\n".join(
[
@@ -143,7 +150,7 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
text = "\n".join(
[
"[00:01.00][00:02.00]hello",
"[00:02.00][00:01.00]hello",
"[00:03.00]world",
"no-tag-line",
"[00:00.00]zero-only",
@@ -153,8 +160,22 @@ def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
plain = LRCData(text).to_plain()
# In synced mode, lines with standard tags are kept (including [00:00.00]),
# while lines without leading standard tags are ignored.
assert plain == "\n".join(["hello", "hello", "world", "zero-only"])
# lines without leading standard tags are ignored, and output is sorted by tag timestamp.
assert plain == "\n".join(["zero-only", "hello", "hello", "world"])
def test_to_plain_sorts_lines_by_timestamp_across_lines() -> None:
text = "\n".join(
[
"[00:05.00]late",
"[00:01.00]early",
"[00:03.00]middle",
]
)
plain = LRCData(text).to_plain()
assert plain == "\n".join(["early", "middle", "late"])
def test_to_plain_deduplicate_collapses_only_consecutive_equals() -> None: