fix: --plain now sorts lyrics to handle multi-tag lrc correctly

This commit is contained in:
2026-04-02 09:51:08 +02:00
parent 7ebf51b78d
commit b5038fac80
2 changed files with 53 additions and 21 deletions
+28 -17
View File
@@ -16,7 +16,7 @@ from .models import CacheStatus
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
# Standard format after normalization: [mm:ss.cc]
_STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
# _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
# Standard format with capture groups
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
@@ -43,6 +43,21 @@ def _remove_pattern(text: str, pattern: re.Pattern) -> str:
return pattern.sub("", text).strip()
def _raw_tag_to_ms(mm: str, ss: str, frac: Optional[str]) -> int:
"""Convert parsed time tag components to total milliseconds."""
if frac is None:
ms = 0
else:
n = len(frac)
if n == 1:
ms = int(frac) * 100
elif n == 2:
ms = int(frac) * 10
else:
ms = int(frac)
return (int(mm) * 60 + int(ss)) * 1000 + ms
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
"""Convert parsed time tag components to standard [mm:ss.cc] string."""
if frac is None:
@@ -225,40 +240,36 @@ class LRCData:
_remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines
).strip("\n")
lines = []
tagged_lines = []
for line in self._lines:
pos = 0
cnt = 0
plain_line = ""
tag_ms = []
while True:
# Only match strictly repeated standard time tags at the start of the line
# Lines without any time tags are ignored.
# Lyric lines are considered already stripped of whitespaces, so no strips here.
m = _STD_TAG_RE.match(line, pos)
m = _STD_TAG_CAPTURE_RE.match(line, pos)
if not m:
plain_line += line[pos:]
lyric = line[pos:]
for tag in tag_ms:
tagged_lines.append((tag, lyric))
break
tag_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
pos = m.end()
cnt += 1
# Also avoid dulplicating blank lines
if deduplicate or not plain_line:
if cnt > 0:
lines.append(plain_line)
else:
for _ in range(cnt):
lines.append(plain_line)
sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])]
if deduplicate:
# Remove consecutive duplicates
deduped_lines = []
prev_line = None
for line in lines:
for line in sorted_lines:
if line != prev_line:
deduped_lines.append(line)
prev_line = line
lines = deduped_lines
sorted_lines = deduped_lines
return "\n".join(lines).strip()
return "\n".join(sorted_lines).strip()
def print_lyrics(
self,