feat: better lrc handling

This commit is contained in:
2026-04-01 17:51:28 +02:00
parent cd60d3042c
commit 0b830e176d
4 changed files with 317 additions and 31 deletions
+81 -29
View File
@@ -21,14 +21,26 @@ _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
# Standard format with capture groups # Standard format with capture groups
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]") _STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
# Matches a standard time tag at the start of a line
_LRC_LINE_RE = re.compile(r"^\[\d{2,}:\d{2}\.\d{2}\]", re.MULTILINE)
# [offset:+/-xxx] tag — value in milliseconds # [offset:+/-xxx] tag — value in milliseconds
_OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE) _OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE)
# Matches any number of tags at the start of a line # Any number of ID/Time tags at the start of a line
_LINE_START_TAGS_RE = re.compile(r"^(?:\[[^\]]*\])+") _LINE_START_TAGS_RE = re.compile(r"^(?:\[[^\]]*\])+", re.MULTILINE)
# Any number of standard time tags at the start of a line
_LINE_START_STD_TAGS_RE = re.compile(r"^(?:\[\d{2,}:\d{2}\.\d{2}\])+", re.MULTILINE)
# Word-level sync tags
# <mm:ss>, <mm:ss.c>, <mm:ss.cc>, <mm:ss:cc>, <xx,yy,zz>
_WORD_SYNC_TAG_RE = re.compile(r"<\d{2,}:\d{2}(?:[.:]\d{1,3})?>|<\d+,\d+,\d+>")
# QRC is totally a completely different matter. Since they are still providing standard LRC APIs,
# it might be a good idea to leave this mass to the future :)
def _remove_pattern(text: str, pattern: re.Pattern) -> str:
"""Remove all occurrences of pattern from text, then strip leading/trailing whitespace."""
return pattern.sub("", text).strip()
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str: def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
@@ -50,6 +62,14 @@ def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
return f"[{mm}:{ss}.{cs:02d}]" return f"[{mm}:{ss}.{cs:02d}]"
def _sanitize_lyric_text(text: str) -> str:
"""Remove possibly word-sync time tags in lyric
Assumes the normal line-sync time tags are already stripped.
"""
return _remove_pattern(text, _WORD_SYNC_TAG_RE)
def _reformat(text: str) -> str: def _reformat(text: str) -> str:
"""Parse each line and reformat to standard [mm:ss.cc]...content form. """Parse each line and reformat to standard [mm:ss.cc]...content form.
@@ -62,7 +82,7 @@ def _reformat(text: str) -> str:
pos = 0 pos = 0
tags: list[str] = [] tags: list[str] = []
while True: while True:
while pos < len(line) and line[pos] == " ": while pos < len(line) and line[pos].isspace():
pos += 1 pos += 1
m = _RAW_TAG_RE.match(line, pos) m = _RAW_TAG_RE.match(line, pos)
# Non-time tags are passed through as-is, except for leading/trailing whitespace which is stripped. # Non-time tags are passed through as-is, except for leading/trailing whitespace which is stripped.
@@ -72,9 +92,10 @@ def _reformat(text: str) -> str:
tags.append(_raw_tag_to_cs(m.group(1), m.group(2), m.group(3))) tags.append(_raw_tag_to_cs(m.group(1), m.group(2), m.group(3)))
pos = m.end() pos = m.end()
if tags: if tags:
# This could break lyric lines of some kind of word-synced LRC format, # This could break lyric lines of some kind of word-synced LRC format, e.g.
# [00:01.00]Lyric [00:02.00]line
# but such format were not planned to be supported in the first place, so… # but such format were not planned to be supported in the first place, so…
out.append("".join(tags) + line[pos:].lstrip()) out.append(_sanitize_lyric_text("".join(tags) + line[pos:]))
else: else:
out.append(line) out.append(line)
# Empty lines with no tags are also preserved # Empty lines with no tags are also preserved
@@ -117,7 +138,7 @@ def normalize_tags(text: str) -> str:
def is_synced(text: str) -> bool: def is_synced(text: str) -> bool:
"""Check whether text contains non-zero LRC time tags. """Check whether text contains non-zero LRC time tags.
Assumes text has been normalized by normalize_tags (standard [mm:ss.cc] format). Assumes text has been normalized by normalize (standard [mm:ss.cc] format).
""" """
tags = _STD_TAG_RE.findall(text) tags = _STD_TAG_RE.findall(text)
return bool(tags) and any(tag != "[00:00.00]" for tag in tags) return bool(tags) and any(tag != "[00:00.00]" for tag in tags)
@@ -126,7 +147,7 @@ def is_synced(text: str) -> bool:
def detect_sync_status(text: str) -> CacheStatus: def detect_sync_status(text: str) -> CacheStatus:
"""Determine whether lyrics contain meaningful LRC time tags. """Determine whether lyrics contain meaningful LRC time tags.
Assumes text has been normalized by normalize_tags. Assumes text has been normalized by normalize.
""" """
return ( return (
CacheStatus.SUCCESS_SYNCED if is_synced(text) else CacheStatus.SUCCESS_UNSYNCED CacheStatus.SUCCESS_SYNCED if is_synced(text) else CacheStatus.SUCCESS_UNSYNCED
@@ -136,19 +157,23 @@ def detect_sync_status(text: str) -> CacheStatus:
def normalize_unsynced(lyrics: str) -> str: def normalize_unsynced(lyrics: str) -> str:
"""Normalize unsynced lyrics so every line has a [00:00.00] tag. """Normalize unsynced lyrics so every line has a [00:00.00] tag.
Assumes lyrics have been normalized by normalize.
- Lines that already have time tags: replace with [00:00.00] - Lines that already have time tags: replace with [00:00.00]
- Lines without time tags: prepend [00:00.00] - Lines without leading tags: prepend [00:00.00]
- Blank lines are converted to [00:00.00] - Blank lines in middle are converted to [00:00.00]
""" """
out: list[str] = [] out: list[str] = []
first = True
for line in lyrics.splitlines(): for line in lyrics.splitlines():
stripped = line.strip() stripped = line.strip()
if not stripped: if not stripped and not first:
out.append("[00:00.00]") out.append("[00:00.00]")
continue continue
cleaned = _LRC_LINE_RE.sub("", stripped) elif not stripped:
while _LRC_LINE_RE.match(cleaned): # Skip leading blank lines
cleaned = _LRC_LINE_RE.sub("", cleaned) continue
first = False
cleaned = _remove_pattern(line, _LINE_START_STD_TAGS_RE)
out.append(f"[00:00.00]{cleaned}") out.append(f"[00:00.00]{cleaned}")
return "\n".join(out) return "\n".join(out)
@@ -183,25 +208,52 @@ def get_sidecar_path(
def to_plain( def to_plain(
text: str, text: str,
deduplicate: bool = False,
) -> str: ) -> str:
"""Convert lyrics to plain text with all tags stripped. """Convert lyrics to plain text with all tags stripped.
Assumes text has been normalized by normalize_tags. If deduplicate is True, only keep the first line of consecutive lines with the same lyric text (after stripping tags).
Otherwise, lines with multiple time tags will be duplicated as many times as the number of tags.
Assumes text has been normalized by normalize.
""" """
if not is_synced(text):
# If there are no meaningful time tags, just strip all tags and return
return _remove_pattern(text, _LINE_START_TAGS_RE)
lines = [] lines = []
first = True
for line in text.splitlines(): for line in text.splitlines():
cleaned = _LINE_START_TAGS_RE.sub("", line).strip() pos = 0
# Ignore the leading empty lines that is likely caused by tag lines cnt = 0
if not cleaned and not first: plain_line = ""
lines.append("") while True:
elif cleaned: # Only match strictly repeated standard time tags at the start of the line
lines.append(cleaned) # Lines without any time tags are ignored.
first = False # Lyric lines are considered already stripped of whitespaces, so no strips here.
# Remove trailing empty lines that are meaningless m = _STD_TAG_RE.match(line, pos)
while lines and not lines[-1]: if not m:
lines.pop() plain_line += line[pos:]
break
pos = m.end()
cnt += 1
# Also avoid dulplicating blank lines
if deduplicate or not plain_line:
if cnt > 0:
lines.append(plain_line)
else:
for _ in range(cnt):
lines.append(plain_line)
if deduplicate:
# Remove consecutive duplicates
deduped_lines = []
prev_line = None
for line in lines:
if line != prev_line:
deduped_lines.append(line)
prev_line = line
lines = deduped_lines
return "\n".join(lines) return "\n".join(lines)
@@ -211,7 +263,7 @@ def print_lyrics(
) -> None: ) -> None:
"""Print lyrics, optionally stripping tags. """Print lyrics, optionally stripping tags.
Assumes text has been normalized by normalize_tags. Assumes text has been normalized by normalize.
""" """
if plain: if plain:
print(to_plain(text)) print(to_plain(text))
+4 -1
View File
@@ -25,4 +25,7 @@ lrx = "lrx_cli.cli:run"
ignore = ["E402"] ignore = ["E402"]
[dependency-groups] [dependency-groups]
dev = ["ruff>=0.15.8"] dev = [
"pytest>=9.0.2",
"ruff>=0.15.8",
]
+184
View File
@@ -0,0 +1,184 @@
from __future__ import annotations
from lrx_cli.lrc import (
detect_sync_status,
is_synced,
normalize_tags,
normalize_unsynced,
to_plain,
)
from lrx_cli.models import CacheStatus
def test_normalize_tags_supports_all_raw_time_formats() -> None:
raw = "\n".join(
[
"[00:01]a",
"[00:02.3]b",
"[00:03.45]c",
"[00:04.678]d",
"[00:05:999]e",
]
)
normalized = normalize_tags(raw)
assert normalized == "\n".join(
[
"[00:01.00]a",
"[00:02.30]b",
"[00:03.45]c",
"[00:04.68]d",
"[00:05.99]e",
]
)
def test_normalize_tags_keeps_non_timed_lines_trimmed_and_unchanged() -> None:
raw = " plain line \n\n [ar:Meta Header] "
normalized = normalize_tags(raw)
assert normalized == "plain line\n\n[ar:Meta Header]"
def test_normalize_tags_removes_word_sync_patterns() -> None:
raw = (
"[00:01.00]<00:01>hello\n"
"[00:02.00]<00:02.3>world\n"
"[00:03.00]<00:03.45>foo\n"
"[00:04.00]<00:04:678>bar\n"
"[00:05.00]<1,2,3>baz"
)
normalized = normalize_tags(raw)
assert normalized == "\n".join(
[
"[00:01.00]hello",
"[00:02.00]world",
"[00:03.00]foo",
"[00:04.00]bar",
"[00:05.00]baz",
]
)
def test_normalize_tags_keeps_midline_timestamps_as_is() -> None:
raw = "[00:01.00]Lyric [00:02.00]line"
normalized = normalize_tags(raw)
assert normalized == "[00:01.00]Lyric [00:02.00]line"
def test_normalize_tags_applies_positive_and_negative_offset_per_spec() -> None:
positive = normalize_tags("[offset:+1000]\n[00:10.00]line")
negative = normalize_tags("[offset:-500]\n[00:10.00]line")
assert positive == "[00:09.00]line"
assert negative == "[00:10.50]line"
def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None:
raw = "\t [00:01.2] hello"
normalized = normalize_tags(raw)
assert normalized == "[00:01.20]hello"
def test_normalize_tags_handles_consecutive_start_tags_with_spaces_between() -> None:
raw = "[00:01] [00:02.3] chorus"
normalized = normalize_tags(raw)
assert normalized == "[00:01.00][00:02.30]chorus"
def test_normalize_tags_preserves_non_leading_raw_like_tags() -> None:
raw = "intro [00:01]line"
normalized = normalize_tags(raw)
assert normalized == "intro [00:01]line"
def test_normalize_tags_removes_offset_tag_line_even_without_lyrics() -> None:
raw = "[offset:+500]"
normalized = normalize_tags(raw)
assert normalized == ""
def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None:
plain_text = "just some lyrics\nwithout tags"
unsynced_text = "[00:00.00]a\n[00:00.00]b"
synced_text = "[00:00.00]a\n[00:01.00]b"
assert is_synced(plain_text) is False
assert detect_sync_status(plain_text) is CacheStatus.SUCCESS_UNSYNCED
assert is_synced(unsynced_text) is False
assert detect_sync_status(unsynced_text) is CacheStatus.SUCCESS_UNSYNCED
assert is_synced(synced_text) is True
assert detect_sync_status(synced_text) is CacheStatus.SUCCESS_SYNCED
def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
lyrics = "\n[00:12.34]first\nsecond\n\n[00:00.00]third"
normalized = normalize_unsynced(lyrics)
assert normalized == "\n".join(
[
"[00:00.00]first",
"[00:00.00]second",
"[00:00.00]",
"[00:00.00]third",
]
)
def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
text = "\n".join(
[
"[00:01.00][00:02.00]hello",
"[00:03.00]world",
"no-tag-line",
"[00:00.00]zero-only",
]
)
plain = to_plain(text)
# In synced mode, lines with standard tags are kept (including [00:00.00]),
# while lines without leading standard tags are ignored.
assert plain == "\n".join(["hello", "hello", "world", "zero-only"])
def test_to_plain_deduplicate_collapses_only_consecutive_equals() -> None:
text = "\n".join(
[
"[00:01.00][00:02.00]hello",
"[00:03.00]hello",
"[00:04.00]",
"[00:05.00]",
"[00:06.00]world",
"[00:07.00]hello",
]
)
plain = to_plain(text, deduplicate=True)
assert plain == "\n".join(["hello", "", "world", "hello"])
def test_to_plain_fallback_for_non_synced_text_strips_start_tags() -> None:
text = "\n".join(["[ar:Artist]", "[00:00.00]only-zero", "plain line"])
plain = to_plain(text)
assert plain == "only-zero\nplain line"
Generated
+48 -1
View File
@@ -129,6 +129,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
] ]
[[package]]
name = "iniconfig"
version = "2.3.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
]
[[package]] [[package]]
name = "loguru" name = "loguru"
version = "0.7.3" version = "0.7.3"
@@ -158,6 +167,7 @@ dependencies = [
[package.dev-dependencies] [package.dev-dependencies]
dev = [ dev = [
{ name = "pytest" },
{ name = "ruff" }, { name = "ruff" },
] ]
@@ -173,7 +183,10 @@ requires-dist = [
] ]
[package.metadata.requires-dev] [package.metadata.requires-dev]
dev = [{ name = "ruff", specifier = ">=0.15.8" }] dev = [
{ name = "pytest", specifier = ">=9.0.2" },
{ name = "ruff", specifier = ">=0.15.8" },
]
[[package]] [[package]]
name = "markdown-it-py" name = "markdown-it-py"
@@ -205,6 +218,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b0/7a/620f945b96be1f6ee357d211d5bf74ab1b7fe72a9f1525aafbfe3aee6875/mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719", size = 194391, upload-time = "2023-09-03T16:33:29.955Z" }, { url = "https://files.pythonhosted.org/packages/b0/7a/620f945b96be1f6ee357d211d5bf74ab1b7fe72a9f1525aafbfe3aee6875/mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719", size = 194391, upload-time = "2023-09-03T16:33:29.955Z" },
] ]
[[package]]
name = "packaging"
version = "26.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
]
[[package]] [[package]]
name = "platformdirs" name = "platformdirs"
version = "4.9.4" version = "4.9.4"
@@ -214,6 +236,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" }, { url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl", hash = "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868", size = 21216, upload-time = "2026-03-05T18:34:12.172Z" },
] ]
[[package]]
name = "pluggy"
version = "1.6.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
]
[[package]] [[package]]
name = "pygments" name = "pygments"
version = "2.19.2" version = "2.19.2"
@@ -223,6 +254,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
] ]
[[package]]
name = "pytest"
version = "9.0.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
]
[[package]] [[package]]
name = "python-dotenv" name = "python-dotenv"
version = "1.2.2" version = "1.2.2"