fix: --plain now sorts lyrics to handle multi-tag lrc correctly
This commit is contained in:
+28
-17
@@ -16,7 +16,7 @@ from .models import CacheStatus
|
|||||||
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
|
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
|
||||||
|
|
||||||
# Standard format after normalization: [mm:ss.cc]
|
# Standard format after normalization: [mm:ss.cc]
|
||||||
_STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
|
# _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
|
||||||
|
|
||||||
# Standard format with capture groups
|
# Standard format with capture groups
|
||||||
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
|
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
|
||||||
@@ -43,6 +43,21 @@ def _remove_pattern(text: str, pattern: re.Pattern) -> str:
|
|||||||
return pattern.sub("", text).strip()
|
return pattern.sub("", text).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _raw_tag_to_ms(mm: str, ss: str, frac: Optional[str]) -> int:
|
||||||
|
"""Convert parsed time tag components to total milliseconds."""
|
||||||
|
if frac is None:
|
||||||
|
ms = 0
|
||||||
|
else:
|
||||||
|
n = len(frac)
|
||||||
|
if n == 1:
|
||||||
|
ms = int(frac) * 100
|
||||||
|
elif n == 2:
|
||||||
|
ms = int(frac) * 10
|
||||||
|
else:
|
||||||
|
ms = int(frac)
|
||||||
|
return (int(mm) * 60 + int(ss)) * 1000 + ms
|
||||||
|
|
||||||
|
|
||||||
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
|
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
|
||||||
"""Convert parsed time tag components to standard [mm:ss.cc] string."""
|
"""Convert parsed time tag components to standard [mm:ss.cc] string."""
|
||||||
if frac is None:
|
if frac is None:
|
||||||
@@ -225,40 +240,36 @@ class LRCData:
|
|||||||
_remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines
|
_remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines
|
||||||
).strip("\n")
|
).strip("\n")
|
||||||
|
|
||||||
lines = []
|
tagged_lines = []
|
||||||
for line in self._lines:
|
for line in self._lines:
|
||||||
pos = 0
|
pos = 0
|
||||||
cnt = 0
|
tag_ms = []
|
||||||
plain_line = ""
|
|
||||||
while True:
|
while True:
|
||||||
# Only match strictly repeated standard time tags at the start of the line
|
# Only match strictly repeated standard time tags at the start of the line
|
||||||
# Lines without any time tags are ignored.
|
# Lines without any time tags are ignored.
|
||||||
# Lyric lines are considered already stripped of whitespaces, so no strips here.
|
# Lyric lines are considered already stripped of whitespaces, so no strips here.
|
||||||
m = _STD_TAG_RE.match(line, pos)
|
m = _STD_TAG_CAPTURE_RE.match(line, pos)
|
||||||
if not m:
|
if not m:
|
||||||
plain_line += line[pos:]
|
lyric = line[pos:]
|
||||||
|
for tag in tag_ms:
|
||||||
|
tagged_lines.append((tag, lyric))
|
||||||
break
|
break
|
||||||
|
tag_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
|
||||||
pos = m.end()
|
pos = m.end()
|
||||||
cnt += 1
|
|
||||||
# Also avoid dulplicating blank lines
|
sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])]
|
||||||
if deduplicate or not plain_line:
|
|
||||||
if cnt > 0:
|
|
||||||
lines.append(plain_line)
|
|
||||||
else:
|
|
||||||
for _ in range(cnt):
|
|
||||||
lines.append(plain_line)
|
|
||||||
|
|
||||||
if deduplicate:
|
if deduplicate:
|
||||||
# Remove consecutive duplicates
|
# Remove consecutive duplicates
|
||||||
deduped_lines = []
|
deduped_lines = []
|
||||||
prev_line = None
|
prev_line = None
|
||||||
for line in lines:
|
for line in sorted_lines:
|
||||||
if line != prev_line:
|
if line != prev_line:
|
||||||
deduped_lines.append(line)
|
deduped_lines.append(line)
|
||||||
prev_line = line
|
prev_line = line
|
||||||
lines = deduped_lines
|
sorted_lines = deduped_lines
|
||||||
|
|
||||||
return "\n".join(lines).strip()
|
return "\n".join(sorted_lines).strip()
|
||||||
|
|
||||||
def print_lyrics(
|
def print_lyrics(
|
||||||
self,
|
self,
|
||||||
|
|||||||
+25
-4
@@ -1,6 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from lrx_cli.lrc import LRCData
|
from lrx_cli.lrc import LRCData, _raw_tag_to_ms
|
||||||
from lrx_cli.models import CacheStatus
|
from lrx_cli.models import CacheStatus
|
||||||
|
|
||||||
|
|
||||||
@@ -8,6 +8,13 @@ def _normalize(text: str) -> str:
|
|||||||
return str(LRCData(text))
|
return str(LRCData(text))
|
||||||
|
|
||||||
|
|
||||||
|
def test_raw_tag_to_ms_parses_common_fraction_formats() -> None:
|
||||||
|
assert _raw_tag_to_ms("00", "00", None) == 0
|
||||||
|
assert _raw_tag_to_ms("00", "01", "2") == 1200
|
||||||
|
assert _raw_tag_to_ms("00", "01", "23") == 1230
|
||||||
|
assert _raw_tag_to_ms("00", "01", "234") == 1234
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_tags_supports_all_raw_time_formats() -> None:
|
def test_normalize_tags_supports_all_raw_time_formats() -> None:
|
||||||
raw = "\n".join(
|
raw = "\n".join(
|
||||||
[
|
[
|
||||||
@@ -143,7 +150,7 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
|
|||||||
def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
|
def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
|
||||||
text = "\n".join(
|
text = "\n".join(
|
||||||
[
|
[
|
||||||
"[00:01.00][00:02.00]hello",
|
"[00:02.00][00:01.00]hello",
|
||||||
"[00:03.00]world",
|
"[00:03.00]world",
|
||||||
"no-tag-line",
|
"no-tag-line",
|
||||||
"[00:00.00]zero-only",
|
"[00:00.00]zero-only",
|
||||||
@@ -153,8 +160,22 @@ def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
|
|||||||
plain = LRCData(text).to_plain()
|
plain = LRCData(text).to_plain()
|
||||||
|
|
||||||
# In synced mode, lines with standard tags are kept (including [00:00.00]),
|
# In synced mode, lines with standard tags are kept (including [00:00.00]),
|
||||||
# while lines without leading standard tags are ignored.
|
# lines without leading standard tags are ignored, and output is sorted by tag timestamp.
|
||||||
assert plain == "\n".join(["hello", "hello", "world", "zero-only"])
|
assert plain == "\n".join(["zero-only", "hello", "hello", "world"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_plain_sorts_lines_by_timestamp_across_lines() -> None:
|
||||||
|
text = "\n".join(
|
||||||
|
[
|
||||||
|
"[00:05.00]late",
|
||||||
|
"[00:01.00]early",
|
||||||
|
"[00:03.00]middle",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
plain = LRCData(text).to_plain()
|
||||||
|
|
||||||
|
assert plain == "\n".join(["early", "middle", "late"])
|
||||||
|
|
||||||
|
|
||||||
def test_to_plain_deduplicate_collapses_only_consecutive_equals() -> None:
|
def test_to_plain_deduplicate_collapses_only_consecutive_equals() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user