refactor: better (really?🤨) lrc parsing and handling

This commit is contained in:
2026-04-07 18:23:26 +02:00
parent 1414066eed
commit b922a0df28
6 changed files with 411 additions and 205 deletions
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "lrx-cli"
version = "0.6.1"
version = "0.6.2"
description = "Fetch line-synced lyrics for your music player."
readme = "README.md"
requires-python = ">=3.13"
+2 -2
View File
@@ -123,7 +123,7 @@ def fetch(
logger.error("No lyrics found.")
sys.exit(1)
print(result.lyrics.to_lrc(plain=plain))
print(result.lyrics.to_text(plain=plain))
# search
@@ -214,7 +214,7 @@ def search(
logger.error("No lyrics found.")
sys.exit(1)
print(result.lyrics.to_lrc(plain=plain))
print(result.lyrics.to_text(plain=plain))
# export
+266 -167
View File
@@ -1,9 +1,11 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 21:54:01
Description: Shared LRC time-tag utilities (definitely overengineered).
Description: LRC parsing, modeling, and serialization helpers.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
import re
from pathlib import Path
from typing import Optional
@@ -15,27 +17,18 @@ from .models import CacheStatus
# [mm:ss], [mm:ss.c], [mm:ss.cc], [mm:ss.ccc], [mm:ss:cc], …
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
# Standard format after normalization: [mm:ss.cc]
# _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
# Standard format with capture groups
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
# [offset:+/-xxx] tag — value in milliseconds
_OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE)
# Any number of ID/Time tags at the start of a line
# One or more leading bracket tags at line start.
# Used to strip start tags in plain-mode fallback.
_LINE_START_TAGS_RE = re.compile(r"^(?:\[[^\]]*\])+", re.MULTILINE)
# Any number of standard time tags at the start of a line
_LINE_START_STD_TAGS_RE = re.compile(r"^(?:\[\d{2,}:\d{2}\.\d{2}\])+", re.MULTILINE)
# Timed word-sync tags: <mm:ss>, <mm:ss.c>, <mm:ss.cc>, <mm:ss:cc>
_WORD_SYNC_TAG_RE = re.compile(r"<(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?>")
# Word-level sync tags
# <mm:ss>, <mm:ss.c>, <mm:ss.cc>, <mm:ss:cc>, <xx,yy,zz>
_WORD_SYNC_TAG_RE = re.compile(r"<\d{2,}:\d{2}(?:[.:]\d{1,3})?>|<\d+,\d+,\d+>")
# A single doc-level tag line: [key:value].
# Disallow nested [] in value so multi-tag lines are not treated as doc tags.
_DOC_TAG_RE = re.compile(r"^\[([^:\]\[]+):([^\[\]]*)\]$")
# QRC is totally a completely different matter. Since they are still providing standard LRC APIs,
# it might be a good idea to leave this mass to the future :)
# QRC uses a different format and is intentionally out of scope here.
def _remove_pattern(text: str, pattern: re.Pattern) -> str:
@@ -58,170 +51,282 @@ def _raw_tag_to_ms(mm: str, ss: str, frac: Optional[str]) -> int:
return (int(mm) * 60 + int(ss)) * 1000 + ms
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
"""Convert parsed time tag components to standard [mm:ss.cc] string."""
if frac is None:
ms = 0
else:
# cc in [mm:ss:cc] is also treated as centiseconds, per LRC spec
# ^
# why does this format even exist, idk
n = len(frac)
if n == 1:
ms = int(frac) * 100
elif n == 2:
ms = int(frac) * 10
else:
ms = int(frac)
cs = min(round(ms / 10), 99)
return f"[{mm}:{ss}.{cs:02d}]"
def _ms_to_std_tag(total_ms: int) -> str:
mm = max(0, total_ms) // 60000
ss = (max(0, total_ms) % 60000) // 1000
cs = min(round((max(0, total_ms) % 1000) / 10), 99)
return f"[{mm:02d}:{ss:02d}.{cs:02d}]"
def _sanitize_lyric_text(text: str) -> str:
"""Remove possibly word-sync time tags in lyric
def _ms_to_word_tag(total_ms: int) -> str:
mm = max(0, total_ms) // 60000
ss = (max(0, total_ms) % 60000) // 1000
cs = min(round((max(0, total_ms) % 1000) / 10), 99)
return f"<{mm:02d}:{ss:02d}.{cs:02d}>"
Assumes the normal line-sync time tags are already stripped.
@dataclass(frozen=True)
class LrcWordSegment:
text: str
time_ms: Optional[int] = None
duration_ms: Optional[int] = None
class BaseLine(ABC):
"""Common line interface for rendering and text extraction."""
@property
@abstractmethod
def text(self) -> str:
"""Return plain text content for this line."""
@abstractmethod
def to_text(self, include_word_sync: bool) -> str:
"""Return full serialized line text."""
@abstractmethod
def to_plain_unsynced(self) -> Optional[str]:
"""Return this line's plain-text contribution in unsynced mode."""
@abstractmethod
def timed_plain_entries(self) -> list[tuple[int, str]]:
"""Return (timestamp_ms, text) entries for synced plain-mode output."""
def has_nonzero_timestamp(self) -> bool:
return any(ts > 0 for ts, _ in self.timed_plain_entries())
@dataclass
class DocTagLine(BaseLine):
"""Represents a single doc tag line like [ar:Artist]."""
key: str
value: str
@property
def text(self) -> str:
return f"[{self.key}:{self.value}]"
def to_text(self, include_word_sync: bool) -> str:
return self.text
def to_plain_unsynced(self) -> Optional[str]:
return None
def timed_plain_entries(self) -> list[tuple[int, str]]:
return []
@dataclass
class LyricLine(BaseLine):
"""Lyric line with optional line-level timestamps."""
line_times_ms: list[int] = field(default_factory=list)
words: list[LrcWordSegment] = field(default_factory=list)
@property
def text(self) -> str:
return "".join(seg.text for seg in self.words)
def to_text(self, include_word_sync: bool) -> str:
prefix = "".join(_ms_to_std_tag(ms) for ms in self.line_times_ms)
return prefix + self.text
def to_plain_unsynced(self) -> Optional[str]:
return _remove_pattern(self.text, _LINE_START_TAGS_RE)
def timed_plain_entries(self) -> list[tuple[int, str]]:
return [(tag_ms, self.text) for tag_ms in self.line_times_ms]
@dataclass
class WordSyncLyricLine(LyricLine):
"""Lyric line that can render per-word sync tags when requested."""
def to_text(self, include_word_sync: bool) -> str:
prefix = "".join(_ms_to_std_tag(ms) for ms in self.line_times_ms)
if not include_word_sync:
return prefix + self.text
parts: list[str] = []
for seg in self.words:
if seg.time_ms is not None:
parts.append(_ms_to_word_tag(seg.time_ms))
parts.append(seg.text)
return prefix + "".join(parts)
def _split_trimmed_lines(text: str) -> list[str]:
"""Split text into lines, strip each line, and drop outer blank lines."""
lines = [line.strip() for line in text.splitlines()]
while lines and not lines[0].strip():
lines.pop(0)
while lines and not lines[-1].strip():
lines.pop()
return lines
def _extract_leading_line_tags(line: str) -> tuple[list[int], str]:
"""Parse leading line-sync tags and return (times_ms, lyric_part).
Spaces between consecutive leading tags are dropped. If non-space text
appears, parsing of leading tags stops and the remainder is lyric text.
"""
return _remove_pattern(text, _WORD_SYNC_TAG_RE)
pos = 0
tags_ms: list[int] = []
while True:
m = _RAW_TAG_RE.match(line, pos)
if not m:
break
tags_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
pos = m.end()
# Allow spaces only between consecutive leading tags.
# We only check for '[' here; the next loop decides whether it is a valid time tag.
scan = pos
while scan < len(line) and line[scan].isspace():
scan += 1
if scan < len(line) and line[scan] == "[":
pos = scan
continue
pos = scan
break
return tags_ms, line[pos:]
def _reformat(text: str) -> list[str]:
"""Parse each line and reformat to standard [mm:ss.cc]...content form.
def _parse_word_segments(lyric_part: str) -> tuple[list[LrcWordSegment], bool]:
"""Parse timed word-sync tags while preserving all lyric text exactly."""
segments: list[LrcWordSegment] = []
cursor = 0
current_time: Optional[int] = None
has_word_sync = False
Handles any mix of time tag formats on input. Lines with no time tags
are stripped of leading/trailing whitespace and passed through unchanged.
"""
out: list[str] = []
for line in text.splitlines():
line = line.strip()
pos = 0
tags: list[str] = []
while True:
while pos < len(line) and line[pos].isspace():
pos += 1
m = _RAW_TAG_RE.match(line, pos)
# Non-time tags are passed through as-is, except for leading/trailing whitespace which is stripped.
if not m:
# No more tags on this line
break
tags.append(_raw_tag_to_cs(m.group(1), m.group(2), m.group(3)))
pos = m.end()
if tags:
# This could break lyric lines of some kind of word-synced LRC format, e.g.
# [00:01.00]Lyric [00:02.00]line
# but such format were not planned to be supported in the first place, so…
out.append(_sanitize_lyric_text("".join(tags) + line[pos:]))
else:
out.append(line)
# Empty lines with no tags are also preserved
for m in _WORD_SYNC_TAG_RE.finditer(lyric_part):
piece = lyric_part[cursor : m.start()]
if piece:
segments.append(LrcWordSegment(text=piece, time_ms=current_time))
current_time = _raw_tag_to_ms(m.group(1), m.group(2), m.group(3))
has_word_sync = True
cursor = m.end()
# Remove empty lines at the start and end of the whole text, but preserve blank lines in the middle
while out and not out[0].strip():
out.pop(0)
while out and not out[-1].strip():
out.pop()
tail = lyric_part[cursor:]
if tail or not segments:
segments.append(
LrcWordSegment(
text=tail,
time_ms=current_time if has_word_sync else None,
)
)
return segments, has_word_sync
return out
def _is_single_doc_tag_line(line: str) -> Optional[tuple[str, str]]:
"""Return (key, value) only for standalone single doc-tag lines."""
if _RAW_TAG_RE.fullmatch(line):
return None
m = _DOC_TAG_RE.fullmatch(line)
if not m:
return None
key = m.group(1).strip()
value = m.group(2).strip()
return key, value
class LRCData:
_lines: list[str]
_lines: list[BaseLine]
_doc_tags: dict[str, str]
def __init__(self, text: str | None = None) -> None:
def __init__(self, text: Optional[str] = None) -> None:
self._doc_tags = {}
if not text:
self._lines = []
return
self._lines = _reformat(text)
self._apply_offset()
raw_lines = _split_trimmed_lines(text)
parsed: list[BaseLine] = []
for raw in raw_lines:
maybe_tag = _is_single_doc_tag_line(raw)
if maybe_tag is not None:
key, value = maybe_tag
self._doc_tags[key] = value
parsed.append(DocTagLine(key=key, value=value))
continue
tags_ms, lyric_part = _extract_leading_line_tags(raw)
words, has_word_sync = _parse_word_segments(lyric_part if tags_ms else raw)
if has_word_sync:
parsed.append(WordSyncLyricLine(line_times_ms=tags_ms, words=words))
else:
parsed.append(LyricLine(line_times_ms=tags_ms, words=words))
self._lines = parsed
def __str__(self) -> str:
return "\n".join(self._lines)
return self.to_text(plain=False, include_word_sync=False)
def __repr__(self) -> str:
return f"LRCData(lines={self._lines!r})"
def __bool__(self) -> bool:
return len(self._lines) > 0
return f"LRCData(doc_tags={self._doc_tags!r}, lines={self._lines!r})"
def __len__(self) -> int:
return len(self._lines)
def _apply_offset(self):
"""Parse [offset:±ms] and shift all standard [mm:ss.cc] tags accordingly.
@property
def tags(self) -> dict[str, str]:
return self._doc_tags
Per LRC spec, positive offset = lyrics appear sooner (subtract from timestamps).
"""
m: Optional[re.Match] = None
for i, line in enumerate(self._lines):
m = _OFFSET_RE.search(line)
if m:
self._lines.pop(i)
break
if not m:
return
offset_ms = int(m.group(1))
if offset_ms == 0:
return
def _shift(match: re.Match) -> str:
total_ms = max(
0,
(int(match.group(1)) * 60 + int(match.group(2))) * 1000
+ int(match.group(3)) * 10
- offset_ms,
)
new_mm = total_ms // 60000
new_ss = (total_ms % 60000) // 1000
new_cs = min(round((total_ms % 1000) / 10), 99)
return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:02d}]"
self._lines = [_STD_TAG_CAPTURE_RE.sub(_shift, line) for line in self._lines]
@property
def lines(self) -> list[BaseLine]:
return self._lines
def is_synced(self) -> bool:
"""Check whether text contains non-zero LRC time tags.
Assumes text has been normalized by normalize (standard [mm:ss.cc] format).
"""
for line in self._lines:
for m in _STD_TAG_CAPTURE_RE.finditer(line):
if m.group(1) != "00" or m.group(2) != "00" or m.group(3) != "00":
return True
return False
"""Return True if any lyric line contains a non-zero line timestamp."""
return any(line.has_nonzero_timestamp() for line in self._lines)
def detect_sync_status(self) -> CacheStatus:
"""Determine whether lyrics contain meaningful LRC time tags.
Assumes text has been normalized by normalize.
"""
"""Map sync detection result to cache status."""
return (
CacheStatus.SUCCESS_SYNCED
if self.is_synced()
else CacheStatus.SUCCESS_UNSYNCED
)
def normalize_unsynced(self):
"""Normalize unsynced lyrics so every line has a [00:00.00] tag.
def normalize_unsynced(self) -> "LRCData":
"""Convert lyrics into unsynced LRC form with [00:00.00] tags.
Assumes lyrics have been normalized by normalize.
- Lines that already have time tags: replace with [00:00.00]
- Lines without leading tags: prepend [00:00.00]
- Blank lines in middle are converted to [00:00.00]
- Leading blank lyric lines are skipped.
- Middle blank lyric lines are preserved as empty synced lines.
- Doc-tag lines are preserved unchanged.
"""
out: list[str] = []
out: list[BaseLine] = []
first = True
for i, line in enumerate(self._lines):
stripped = line.strip()
for line in self._lines:
if isinstance(line, DocTagLine):
out.append(DocTagLine(key=line.key, value=line.value))
continue
assert isinstance(line, LyricLine)
stripped = line.text.strip()
if not stripped and not first:
out.append("[00:00.00]")
out.append(
LyricLine(line_times_ms=[0], words=[LrcWordSegment(text="")])
)
continue
elif not stripped:
# Skip leading blank lines
continue
first = False
cleaned = _remove_pattern(line, _LINE_START_STD_TAGS_RE)
out.append(f"[00:00.00]{cleaned}")
out.append(
LyricLine(
line_times_ms=[0],
words=[LrcWordSegment(text=line.text)],
)
)
ret = LRCData()
ret._lines = out
ret._doc_tags = dict(self._doc_tags)
return ret
def to_plain(
@@ -230,32 +335,22 @@ class LRCData:
) -> str:
"""Convert lyrics to plain text with all tags stripped.
If deduplicate is True, only keep the first line of consecutive lines with the same lyric text (after stripping tags).
Otherwise, lines with multiple time tags will be duplicated as many times as the number of tags.
Assumes text has been normalized by normalize.
If synced, output is sorted by line timestamp and duplicated for multi-tag lines.
If not synced, leading bracket tags are stripped per line and original order is kept.
If deduplicate is True, only consecutive duplicate plain lines are collapsed.
"""
if not self.is_synced():
return "\n".join(
_remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines
).strip("\n")
plain_lines = [
text
for text in (line.to_plain_unsynced() for line in self._lines)
if text is not None
]
return "\n".join(plain_lines).strip("\n")
tagged_lines = []
tagged_lines: list[tuple[int, str]] = []
for line in self._lines:
pos = 0
tag_ms = []
while True:
# Only match strictly repeated standard time tags at the start of the line
# Lines without any time tags are ignored.
# Lyric lines are considered already stripped of whitespaces, so no strips here.
m = _STD_TAG_CAPTURE_RE.match(line, pos)
if not m:
lyric = line[pos:]
for tag in tag_ms:
tagged_lines.append((tag, lyric))
break
tag_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
pos = m.end()
tagged_lines.extend(line.timed_plain_entries())
sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])]
@@ -271,23 +366,27 @@ class LRCData:
return "\n".join(sorted_lines).strip()
def to_unsynced(self):
def to_unsynced(self) -> "LRCData":
"""Return a plain-text based unsynced representation."""
return LRCData(self.to_plain())
def to_lrc(
def to_text(
self,
plain: bool = False,
include_word_sync: bool = False,
) -> str:
"""Return lyrics, optionally stripping tags.
"""Serialize to LRC text or plain text.
Assumes text has been normalized by normalize.
- plain=True returns to_plain().
- include_word_sync controls rendering of per-word tags for word-sync lines.
"""
ret = self
if not self.is_synced():
ret = self.normalize_unsynced()
if plain:
return ret.to_plain()
return "\n".join(ret._lines)
return self.to_plain(deduplicate=False)
lines: list[str] = [
line.to_text(include_word_sync=include_word_sync) for line in self._lines
]
return "\n".join(lines)
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
+2 -2
View File
@@ -80,7 +80,7 @@ def test_cache_search_fetcher_with_fuzzy_metadata(
assert result is not None
assert result.lyrics is not None
assert result.lyrics.to_lrc() == expected_lrc
assert result.lyrics.to_text() == expected_lrc
def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
@@ -97,7 +97,7 @@ def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
assert result is not None
assert result.lyrics is not None
assert result.lyrics.to_lrc() == "[00:00.01]artist modified"
assert result.lyrics.to_text() == "[00:00.01]artist modified"
@pytest.mark.network
+139 -32
View File
@@ -1,6 +1,11 @@
from __future__ import annotations
from lrx_cli.lrc import LRCData
from lrx_cli.lrc import (
LRCData,
DocTagLine,
LyricLine,
WordSyncLyricLine,
)
from lrx_cli.models import CacheStatus
@@ -8,7 +13,7 @@ def _normalize(text: str) -> str:
return str(LRCData(text))
def test_normalize_tags_supports_all_raw_time_formats() -> None:
def test_time_tag_formats_are_normalized() -> None:
raw = "\n".join(
[
"[00:01]a",
@@ -32,37 +37,27 @@ def test_normalize_tags_supports_all_raw_time_formats() -> None:
)
def test_normalize_tags_keeps_non_timed_lines_trimmed_and_unchanged() -> None:
raw = " plain line \n\n [ar:Meta Header] "
def test_non_timed_lines_are_kept_as_lyrics() -> None:
raw = " plain line \n\n other line "
normalized = _normalize(raw)
assert normalized == "plain line\n\n[ar:Meta Header]"
assert normalized == "plain line\n\nother line"
def test_normalize_tags_removes_word_sync_patterns() -> None:
raw = (
"[00:01.00]<00:01>hello\n"
"[00:02.00]<00:02.3>world\n"
"[00:03.00]<00:03.45>foo\n"
"[00:04.00]<00:04:678>bar\n"
"[00:05.00]<1,2,3>baz"
)
def test_word_sync_tags_are_parsed_and_export_controlled() -> None:
raw = "[00:01.00]<00:01>he <00:01.50>llo\n[00:02.00]plain"
normalized = _normalize(raw)
data = LRCData(raw)
assert normalized == "\n".join(
[
"[00:01.00]hello",
"[00:02.00]world",
"[00:03.00]foo",
"[00:04.00]bar",
"[00:05.00]baz",
]
assert data.to_text(include_word_sync=False) == "[00:01.00]he llo\n[00:02.00]plain"
assert (
data.to_text(include_word_sync=True)
== "[00:01.00]<00:01.00>he <00:01.50>llo\n[00:02.00]plain"
)
def test_normalize_tags_keeps_midline_timestamps_as_is() -> None:
def test_midline_line_tags_are_kept_as_plain_text() -> None:
raw = "[00:01.00]Lyric [00:02.00]line"
normalized = _normalize(raw)
@@ -74,11 +69,11 @@ def test_normalize_tags_applies_positive_and_negative_offset_per_spec() -> None:
positive = _normalize("[offset:+1000]\n[00:10.00]line")
negative = _normalize("[offset:-500]\n[00:10.00]line")
assert positive == "[00:09.00]line"
assert negative == "[00:10.50]line"
assert positive == "[offset:+1000]\n[00:10.00]line"
assert negative == "[offset:-500]\n[00:10.00]line"
def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None:
def test_leading_spaces_before_first_time_tag_are_trimmed() -> None:
raw = "\t [00:01.2] hello"
normalized = _normalize(raw)
@@ -89,12 +84,14 @@ def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None:
def test_normalize_tags_handles_consecutive_start_tags_with_spaces_between() -> None:
raw = "[00:01] [00:02.3] chorus"
normalized = _normalize(raw)
assert normalized == "[00:01.00][00:02.30]chorus"
data = LRCData(raw)
assert len(data.lines) == 1
assert isinstance(data.lines[0], LyricLine)
assert data.lines[0].line_times_ms == [1000, 2300]
assert data.lines[0].text == "chorus"
def test_normalize_tags_preserves_non_leading_raw_like_tags() -> None:
def test_non_leading_time_like_text_is_plain_lyric() -> None:
raw = "intro [00:01]line"
normalized = _normalize(raw)
@@ -107,7 +104,7 @@ def test_normalize_tags_removes_offset_tag_line_even_without_lyrics() -> None:
normalized = _normalize(raw)
assert normalized == ""
assert normalized == "[offset:+500]"
def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None:
@@ -140,7 +137,7 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
)
def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
def test_to_plain_duplicates_lines_for_multi_line_times() -> None:
text = "\n".join(
[
"[00:02.00][00:01.00]hello",
@@ -210,3 +207,113 @@ def test_reformat_pipeline_trims_outer_blanks_and_preserves_inner_blanks() -> No
normalized = str(LRCData(text))
assert normalized == "[00:01.00]a\n\n[00:02.00]b"
def test_single_doc_tag_line_is_not_added_to_lines() -> None:
data = LRCData("[ar:Artist]\n[00:01.00]line")
assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 2
assert isinstance(data.lines[0], DocTagLine)
assert isinstance(data.lines[1], LyricLine)
assert data.lines[1].text == "line"
def test_multiple_doc_tags_on_one_line_are_plain_lyrics() -> None:
data = LRCData("[ar:Artist][ti:Song]")
assert data.tags == {}
assert len(data.lines) == 1
assert data.lines[0].text == "[ar:Artist][ti:Song]"
def test_doc_tag_after_lyrics_is_treated_as_lyrics() -> None:
data = LRCData("[00:01.00]line\n[ar:Artist]")
assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 2
assert isinstance(data.lines[1], DocTagLine)
assert data.lines[1].text == "[ar:Artist]"
def test_unknown_lines_before_lyrics_are_preserved_and_do_not_start_lyrics() -> None:
data = LRCData("comment line\n[ar:Artist]\n[00:01.00]line")
assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 3
assert isinstance(data.lines[0], LyricLine)
assert isinstance(data.lines[1], DocTagLine)
assert data.lines[2].text == "line"
assert str(data).startswith("comment line\n[ar:Artist]\n")
def test_to_plain_excludes_doc_tags_but_keeps_lyrics() -> None:
data = LRCData("[ar:Artist]\n[00:01.00]line\n[ti:Song]\nplain")
assert data.to_plain() == "line"
def test_non_space_between_line_tags_stops_tag_parsing() -> None:
data = LRCData("[00:01.00]x[00:02.00]tail")
assert len(data.lines) == 1
assert isinstance(data.lines[0], LyricLine)
assert data.lines[0].line_times_ms == [1000]
assert data.lines[0].text == "x[00:02.00]tail"
def test_line_only_time_tag_is_valid_empty_lyric() -> None:
data = LRCData("[00:01.00]")
assert len(data.lines) == 1
assert isinstance(data.lines[0], LyricLine)
assert data.lines[0].line_times_ms == [1000]
assert data.lines[0].text == ""
def test_model_uses_subclass_for_word_sync_lines() -> None:
a = LRCData("[00:01.00]<00:00.50>lyric")
b = LRCData("[00:01.00]lyric")
assert isinstance(a.lines[0], WordSyncLyricLine)
assert isinstance(b.lines[0], LyricLine)
assert not isinstance(b.lines[0], WordSyncLyricLine)
def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None:
data = LRCData("[00:01.00]<00:02.00>")
assert isinstance(data.lines[0], WordSyncLyricLine)
assert data.to_text(include_word_sync=False) == "[00:01.00]"
assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>"
def test_to_text_plain_true_matches_to_plain_output() -> None:
data = LRCData("[00:02.00]b\n[00:01.00]a")
assert data.to_text(plain=True) == data.to_plain()
def test_to_unsynced_converts_to_plain_based_unsynced_data() -> None:
data = LRCData("[ar:Artist]\n[00:02.00]b\n[00:01.00]a")
unsynced = data.to_unsynced()
assert isinstance(unsynced, LRCData)
assert str(unsynced) == "a\nb"
def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None:
data = LRCData("[ar:First]\n[ar:Second]\n[00:01.00]line")
assert data.tags == {"ar": "Second"}
assert len(data.lines) == 3
assert isinstance(data.lines[0], DocTagLine)
assert isinstance(data.lines[1], DocTagLine)
assert str(data).startswith("[ar:First]\n[ar:Second]\n")
def test_to_plain_for_doc_only_text_is_empty() -> None:
data = LRCData("[ar:Artist]\n[ti:Song]")
assert data.to_plain() == ""
Generated
+1 -1
View File
@@ -153,7 +153,7 @@ wheels = [
[[package]]
name = "lrx-cli"
version = "0.6.1"
version = "0.6.2"
source = { editable = "." }
dependencies = [
{ name = "cyclopts" },