Compare commits

...

2 Commits

Author SHA1 Message Date
Uyanide b922a0df28 refactor: better (really?🤨) lrc parsing and handling 2026-04-07 19:33:17 +02:00
Uyanide 1414066eed chore: de-markdown-lize comments 2026-04-07 19:33:17 +02:00
9 changed files with 418 additions and 212 deletions
+1 -1
View File
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project] [project]
name = "lrx-cli" name = "lrx-cli"
version = "0.6.1" version = "0.6.2"
description = "Fetch line-synced lyrics for your music player." description = "Fetch line-synced lyrics for your music player."
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
+5 -5
View File
@@ -29,7 +29,7 @@ _ALL_SLOTS = (SLOT_SYNCED, SLOT_UNSYNCED)
# Fixed WHERE clause for exact track matching. Column names are hardcoded # Fixed WHERE clause for exact track matching. Column names are hardcoded
# literals; only the *values* come from user-supplied params — no injection risk. # literals; only the values come from user-supplied params — no injection risk.
_TRACK_WHERE = ( _TRACK_WHERE = (
"(? IS NULL OR artist = ?) AND " "(? IS NULL OR artist = ?) AND "
"(? IS NULL OR title = ?) AND " "(? IS NULL OR title = ?) AND "
@@ -249,7 +249,7 @@ class CacheEngine:
# Read # Read
def get_all(self, track: TrackMeta, source: str) -> list[LyricResult]: def get_all(self, track: TrackMeta, source: str) -> list[LyricResult]:
"""Return all non-expired cached slot rows for *track*/*source*.""" """Return all non-expired cached slot rows for track/source."""
try: try:
key = _generate_key(track, source) key = _generate_key(track, source)
except ValueError: except ValueError:
@@ -430,7 +430,7 @@ class CacheEngine:
def find_best_positive( def find_best_positive(
self, track: TrackMeta, status: CacheStatus self, track: TrackMeta, status: CacheStatus
) -> Optional[LyricResult]: ) -> Optional[LyricResult]:
"""Find the best positive (synced/unsynced) cache entry for *track*. """Find the best positive (synced/unsynced) cache entry for track.
Uses exact metadata match (artist + title + album) across all sources. Uses exact metadata match (artist + title + album) across all sources.
Returns the highest-confidence entry, or None. Returns the highest-confidence entry, or None.
@@ -488,7 +488,7 @@ class CacheEngine:
making hard artist filtering unreliable for cross-language queries. making hard artist filtering unreliable for cross-language queries.
Ignores artist, album and source. Only returns positive results Ignores artist, album and source. Only returns positive results
(synced/unsynced) that have not expired. When *length* is provided, (synced/unsynced) that have not expired. When length is provided,
filters by duration tolerance and sorts by closest match. filters by duration tolerance and sorts by closest match.
""" """
if not title: if not title:
@@ -551,7 +551,7 @@ class CacheEngine:
confidence: float, confidence: float,
source: str, source: str,
) -> int: ) -> int:
"""Update confidence for a specific source's cache entry matching *track*. """Update confidence for a specific source's cache entry matching track.
Returns the number of rows updated. Returns the number of rows updated.
""" """
+2 -2
View File
@@ -123,7 +123,7 @@ def fetch(
logger.error("No lyrics found.") logger.error("No lyrics found.")
sys.exit(1) sys.exit(1)
print(result.lyrics.to_lrc(plain=plain)) print(result.lyrics.to_text(plain=plain))
# search # search
@@ -214,7 +214,7 @@ def search(
logger.error("No lyrics found.") logger.error("No lyrics found.")
sys.exit(1) sys.exit(1)
print(result.lyrics.to_lrc(plain=plain)) print(result.lyrics.to_text(plain=plain))
# export # export
+1 -1
View File
@@ -275,7 +275,7 @@ class LrcManager:
bypass_cache: bool = False, bypass_cache: bool = False,
allow_unsynced: bool = False, allow_unsynced: bool = False,
) -> Optional[LyricResult]: ) -> Optional[LyricResult]:
"""Fetch lyrics for *track* using the group-based parallel pipeline.""" """Fetch lyrics for track using the group-based parallel pipeline."""
return asyncio.run( return asyncio.run(
self._fetch_for_track( self._fetch_for_track(
track, track,
+266 -167
View File
@@ -1,9 +1,11 @@
""" """
Author: Uyanide pywang0608@foxmail.com Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-25 21:54:01 Date: 2026-03-25 21:54:01
Description: Shared LRC time-tag utilities (definitely overengineered). Description: LRC parsing, modeling, and serialization helpers.
""" """
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
import re import re
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@@ -15,27 +17,18 @@ from .models import CacheStatus
# [mm:ss], [mm:ss.c], [mm:ss.cc], [mm:ss.ccc], [mm:ss:cc], … # [mm:ss], [mm:ss.c], [mm:ss.cc], [mm:ss.ccc], [mm:ss:cc], …
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]") _RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
# Standard format after normalization: [mm:ss.cc] # One or more leading bracket tags at line start.
# _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]") # Used to strip start tags in plain-mode fallback.
# Standard format with capture groups
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
# [offset:+/-xxx] tag — value in milliseconds
_OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE)
# Any number of ID/Time tags at the start of a line
_LINE_START_TAGS_RE = re.compile(r"^(?:\[[^\]]*\])+", re.MULTILINE) _LINE_START_TAGS_RE = re.compile(r"^(?:\[[^\]]*\])+", re.MULTILINE)
# Any number of standard time tags at the start of a line # Timed word-sync tags: <mm:ss>, <mm:ss.c>, <mm:ss.cc>, <mm:ss:cc>
_LINE_START_STD_TAGS_RE = re.compile(r"^(?:\[\d{2,}:\d{2}\.\d{2}\])+", re.MULTILINE) _WORD_SYNC_TAG_RE = re.compile(r"<(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?>")
# Word-level sync tags # A single doc-level tag line: [key:value].
# <mm:ss>, <mm:ss.c>, <mm:ss.cc>, <mm:ss:cc>, <xx,yy,zz> # Disallow nested [] in value so multi-tag lines are not treated as doc tags.
_WORD_SYNC_TAG_RE = re.compile(r"<\d{2,}:\d{2}(?:[.:]\d{1,3})?>|<\d+,\d+,\d+>") _DOC_TAG_RE = re.compile(r"^\[([^:\]\[]+):([^\[\]]*)\]$")
# QRC is totally a completely different matter. Since they are still providing standard LRC APIs, # QRC uses a different format and is intentionally out of scope here.
# it might be a good idea to leave this mass to the future :)
def _remove_pattern(text: str, pattern: re.Pattern) -> str: def _remove_pattern(text: str, pattern: re.Pattern) -> str:
@@ -58,170 +51,282 @@ def _raw_tag_to_ms(mm: str, ss: str, frac: Optional[str]) -> int:
return (int(mm) * 60 + int(ss)) * 1000 + ms return (int(mm) * 60 + int(ss)) * 1000 + ms
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str: def _ms_to_std_tag(total_ms: int) -> str:
"""Convert parsed time tag components to standard [mm:ss.cc] string.""" mm = max(0, total_ms) // 60000
if frac is None: ss = (max(0, total_ms) % 60000) // 1000
ms = 0 cs = min(round((max(0, total_ms) % 1000) / 10), 99)
else: return f"[{mm:02d}:{ss:02d}.{cs:02d}]"
# cc in [mm:ss:cc] is also treated as centiseconds, per LRC spec
# ^
# why does this format even exist, idk
n = len(frac)
if n == 1:
ms = int(frac) * 100
elif n == 2:
ms = int(frac) * 10
else:
ms = int(frac)
cs = min(round(ms / 10), 99)
return f"[{mm}:{ss}.{cs:02d}]"
def _sanitize_lyric_text(text: str) -> str: def _ms_to_word_tag(total_ms: int) -> str:
"""Remove possibly word-sync time tags in lyric mm = max(0, total_ms) // 60000
ss = (max(0, total_ms) % 60000) // 1000
cs = min(round((max(0, total_ms) % 1000) / 10), 99)
return f"<{mm:02d}:{ss:02d}.{cs:02d}>"
Assumes the normal line-sync time tags are already stripped.
@dataclass(frozen=True)
class LrcWordSegment:
text: str
time_ms: Optional[int] = None
duration_ms: Optional[int] = None
class BaseLine(ABC):
"""Common line interface for rendering and text extraction."""
@property
@abstractmethod
def text(self) -> str:
"""Return plain text content for this line."""
@abstractmethod
def to_text(self, include_word_sync: bool) -> str:
"""Return full serialized line text."""
@abstractmethod
def to_plain_unsynced(self) -> Optional[str]:
"""Return this line's plain-text contribution in unsynced mode."""
@abstractmethod
def timed_plain_entries(self) -> list[tuple[int, str]]:
"""Return (timestamp_ms, text) entries for synced plain-mode output."""
def has_nonzero_timestamp(self) -> bool:
return any(ts > 0 for ts, _ in self.timed_plain_entries())
@dataclass
class DocTagLine(BaseLine):
"""Represents a single doc tag line like [ar:Artist]."""
key: str
value: str
@property
def text(self) -> str:
return f"[{self.key}:{self.value}]"
def to_text(self, include_word_sync: bool) -> str:
return self.text
def to_plain_unsynced(self) -> Optional[str]:
return None
def timed_plain_entries(self) -> list[tuple[int, str]]:
return []
@dataclass
class LyricLine(BaseLine):
"""Lyric line with optional line-level timestamps."""
line_times_ms: list[int] = field(default_factory=list)
words: list[LrcWordSegment] = field(default_factory=list)
@property
def text(self) -> str:
return "".join(seg.text for seg in self.words)
def to_text(self, include_word_sync: bool) -> str:
prefix = "".join(_ms_to_std_tag(ms) for ms in self.line_times_ms)
return prefix + self.text
def to_plain_unsynced(self) -> Optional[str]:
return _remove_pattern(self.text, _LINE_START_TAGS_RE)
def timed_plain_entries(self) -> list[tuple[int, str]]:
return [(tag_ms, self.text) for tag_ms in self.line_times_ms]
@dataclass
class WordSyncLyricLine(LyricLine):
"""Lyric line that can render per-word sync tags when requested."""
def to_text(self, include_word_sync: bool) -> str:
prefix = "".join(_ms_to_std_tag(ms) for ms in self.line_times_ms)
if not include_word_sync:
return prefix + self.text
parts: list[str] = []
for seg in self.words:
if seg.time_ms is not None:
parts.append(_ms_to_word_tag(seg.time_ms))
parts.append(seg.text)
return prefix + "".join(parts)
def _split_trimmed_lines(text: str) -> list[str]:
"""Split text into lines, strip each line, and drop outer blank lines."""
lines = [line.strip() for line in text.splitlines()]
while lines and not lines[0].strip():
lines.pop(0)
while lines and not lines[-1].strip():
lines.pop()
return lines
def _extract_leading_line_tags(line: str) -> tuple[list[int], str]:
"""Parse leading line-sync tags and return (times_ms, lyric_part).
Spaces between consecutive leading tags are dropped. If non-space text
appears, parsing of leading tags stops and the remainder is lyric text.
""" """
return _remove_pattern(text, _WORD_SYNC_TAG_RE) pos = 0
tags_ms: list[int] = []
while True:
m = _RAW_TAG_RE.match(line, pos)
if not m:
break
tags_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
pos = m.end()
# Allow spaces only between consecutive leading tags.
# We only check for '[' here; the next loop decides whether it is a valid time tag.
scan = pos
while scan < len(line) and line[scan].isspace():
scan += 1
if scan < len(line) and line[scan] == "[":
pos = scan
continue
pos = scan
break
return tags_ms, line[pos:]
def _reformat(text: str) -> list[str]: def _parse_word_segments(lyric_part: str) -> tuple[list[LrcWordSegment], bool]:
"""Parse each line and reformat to standard [mm:ss.cc]...content form. """Parse timed word-sync tags while preserving all lyric text exactly."""
segments: list[LrcWordSegment] = []
cursor = 0
current_time: Optional[int] = None
has_word_sync = False
Handles any mix of time tag formats on input. Lines with no time tags for m in _WORD_SYNC_TAG_RE.finditer(lyric_part):
are stripped of leading/trailing whitespace and passed through unchanged. piece = lyric_part[cursor : m.start()]
""" if piece:
out: list[str] = [] segments.append(LrcWordSegment(text=piece, time_ms=current_time))
for line in text.splitlines(): current_time = _raw_tag_to_ms(m.group(1), m.group(2), m.group(3))
line = line.strip() has_word_sync = True
pos = 0 cursor = m.end()
tags: list[str] = []
while True:
while pos < len(line) and line[pos].isspace():
pos += 1
m = _RAW_TAG_RE.match(line, pos)
# Non-time tags are passed through as-is, except for leading/trailing whitespace which is stripped.
if not m:
# No more tags on this line
break
tags.append(_raw_tag_to_cs(m.group(1), m.group(2), m.group(3)))
pos = m.end()
if tags:
# This could break lyric lines of some kind of word-synced LRC format, e.g.
# [00:01.00]Lyric [00:02.00]line
# but such format were not planned to be supported in the first place, so…
out.append(_sanitize_lyric_text("".join(tags) + line[pos:]))
else:
out.append(line)
# Empty lines with no tags are also preserved
# Remove empty lines at the start and end of the whole text, but preserve blank lines in the middle tail = lyric_part[cursor:]
while out and not out[0].strip(): if tail or not segments:
out.pop(0) segments.append(
while out and not out[-1].strip(): LrcWordSegment(
out.pop() text=tail,
time_ms=current_time if has_word_sync else None,
)
)
return segments, has_word_sync
return out
def _is_single_doc_tag_line(line: str) -> Optional[tuple[str, str]]:
"""Return (key, value) only for standalone single doc-tag lines."""
if _RAW_TAG_RE.fullmatch(line):
return None
m = _DOC_TAG_RE.fullmatch(line)
if not m:
return None
key = m.group(1).strip()
value = m.group(2).strip()
return key, value
class LRCData: class LRCData:
_lines: list[str] _lines: list[BaseLine]
_doc_tags: dict[str, str]
def __init__(self, text: str | None = None) -> None: def __init__(self, text: Optional[str] = None) -> None:
self._doc_tags = {}
if not text: if not text:
self._lines = [] self._lines = []
return return
self._lines = _reformat(text)
self._apply_offset() raw_lines = _split_trimmed_lines(text)
parsed: list[BaseLine] = []
for raw in raw_lines:
maybe_tag = _is_single_doc_tag_line(raw)
if maybe_tag is not None:
key, value = maybe_tag
self._doc_tags[key] = value
parsed.append(DocTagLine(key=key, value=value))
continue
tags_ms, lyric_part = _extract_leading_line_tags(raw)
words, has_word_sync = _parse_word_segments(lyric_part if tags_ms else raw)
if has_word_sync:
parsed.append(WordSyncLyricLine(line_times_ms=tags_ms, words=words))
else:
parsed.append(LyricLine(line_times_ms=tags_ms, words=words))
self._lines = parsed
def __str__(self) -> str: def __str__(self) -> str:
return "\n".join(self._lines) return self.to_text(plain=False, include_word_sync=False)
def __repr__(self) -> str: def __repr__(self) -> str:
return f"LRCData(lines={self._lines!r})" return f"LRCData(doc_tags={self._doc_tags!r}, lines={self._lines!r})"
def __bool__(self) -> bool:
return len(self._lines) > 0
def __len__(self) -> int: def __len__(self) -> int:
return len(self._lines) return len(self._lines)
def _apply_offset(self): @property
"""Parse [offset:±ms] and shift all standard [mm:ss.cc] tags accordingly. def tags(self) -> dict[str, str]:
return self._doc_tags
Per LRC spec, positive offset = lyrics appear sooner (subtract from timestamps). @property
""" def lines(self) -> list[BaseLine]:
m: Optional[re.Match] = None return self._lines
for i, line in enumerate(self._lines):
m = _OFFSET_RE.search(line)
if m:
self._lines.pop(i)
break
if not m:
return
offset_ms = int(m.group(1))
if offset_ms == 0:
return
def _shift(match: re.Match) -> str:
total_ms = max(
0,
(int(match.group(1)) * 60 + int(match.group(2))) * 1000
+ int(match.group(3)) * 10
- offset_ms,
)
new_mm = total_ms // 60000
new_ss = (total_ms % 60000) // 1000
new_cs = min(round((total_ms % 1000) / 10), 99)
return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:02d}]"
self._lines = [_STD_TAG_CAPTURE_RE.sub(_shift, line) for line in self._lines]
def is_synced(self) -> bool: def is_synced(self) -> bool:
"""Check whether text contains non-zero LRC time tags. """Return True if any lyric line contains a non-zero line timestamp."""
return any(line.has_nonzero_timestamp() for line in self._lines)
Assumes text has been normalized by normalize (standard [mm:ss.cc] format).
"""
for line in self._lines:
for m in _STD_TAG_CAPTURE_RE.finditer(line):
if m.group(1) != "00" or m.group(2) != "00" or m.group(3) != "00":
return True
return False
def detect_sync_status(self) -> CacheStatus: def detect_sync_status(self) -> CacheStatus:
"""Determine whether lyrics contain meaningful LRC time tags. """Map sync detection result to cache status."""
Assumes text has been normalized by normalize.
"""
return ( return (
CacheStatus.SUCCESS_SYNCED CacheStatus.SUCCESS_SYNCED
if self.is_synced() if self.is_synced()
else CacheStatus.SUCCESS_UNSYNCED else CacheStatus.SUCCESS_UNSYNCED
) )
def normalize_unsynced(self): def normalize_unsynced(self) -> "LRCData":
"""Normalize unsynced lyrics so every line has a [00:00.00] tag. """Convert lyrics into unsynced LRC form with [00:00.00] tags.
Assumes lyrics have been normalized by normalize. - Leading blank lyric lines are skipped.
- Lines that already have time tags: replace with [00:00.00] - Middle blank lyric lines are preserved as empty synced lines.
- Lines without leading tags: prepend [00:00.00] - Doc-tag lines are preserved unchanged.
- Blank lines in middle are converted to [00:00.00]
""" """
out: list[str] = [] out: list[BaseLine] = []
first = True first = True
for i, line in enumerate(self._lines): for line in self._lines:
stripped = line.strip() if isinstance(line, DocTagLine):
out.append(DocTagLine(key=line.key, value=line.value))
continue
assert isinstance(line, LyricLine)
stripped = line.text.strip()
if not stripped and not first: if not stripped and not first:
out.append("[00:00.00]") out.append(
LyricLine(line_times_ms=[0], words=[LrcWordSegment(text="")])
)
continue continue
elif not stripped: elif not stripped:
# Skip leading blank lines
continue continue
first = False first = False
cleaned = _remove_pattern(line, _LINE_START_STD_TAGS_RE) out.append(
out.append(f"[00:00.00]{cleaned}") LyricLine(
line_times_ms=[0],
words=[LrcWordSegment(text=line.text)],
)
)
ret = LRCData() ret = LRCData()
ret._lines = out ret._lines = out
ret._doc_tags = dict(self._doc_tags)
return ret return ret
def to_plain( def to_plain(
@@ -230,32 +335,22 @@ class LRCData:
) -> str: ) -> str:
"""Convert lyrics to plain text with all tags stripped. """Convert lyrics to plain text with all tags stripped.
If deduplicate is True, only keep the first line of consecutive lines with the same lyric text (after stripping tags). If synced, output is sorted by line timestamp and duplicated for multi-tag lines.
Otherwise, lines with multiple time tags will be duplicated as many times as the number of tags. If not synced, leading bracket tags are stripped per line and original order is kept.
Assumes text has been normalized by normalize. If deduplicate is True, only consecutive duplicate plain lines are collapsed.
""" """
if not self.is_synced(): if not self.is_synced():
return "\n".join( plain_lines = [
_remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines text
).strip("\n") for text in (line.to_plain_unsynced() for line in self._lines)
if text is not None
]
return "\n".join(plain_lines).strip("\n")
tagged_lines = [] tagged_lines: list[tuple[int, str]] = []
for line in self._lines: for line in self._lines:
pos = 0 tagged_lines.extend(line.timed_plain_entries())
tag_ms = []
while True:
# Only match strictly repeated standard time tags at the start of the line
# Lines without any time tags are ignored.
# Lyric lines are considered already stripped of whitespaces, so no strips here.
m = _STD_TAG_CAPTURE_RE.match(line, pos)
if not m:
lyric = line[pos:]
for tag in tag_ms:
tagged_lines.append((tag, lyric))
break
tag_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
pos = m.end()
sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])] sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])]
@@ -271,23 +366,27 @@ class LRCData:
return "\n".join(sorted_lines).strip() return "\n".join(sorted_lines).strip()
def to_unsynced(self): def to_unsynced(self) -> "LRCData":
"""Return a plain-text based unsynced representation."""
return LRCData(self.to_plain()) return LRCData(self.to_plain())
def to_lrc( def to_text(
self, self,
plain: bool = False, plain: bool = False,
include_word_sync: bool = False,
) -> str: ) -> str:
"""Return lyrics, optionally stripping tags. """Serialize to LRC text or plain text.
Assumes text has been normalized by normalize. - plain=True returns to_plain().
- include_word_sync controls rendering of per-word tags for word-sync lines.
""" """
ret = self
if not self.is_synced():
ret = self.normalize_unsynced()
if plain: if plain:
return ret.to_plain() return self.to_plain(deduplicate=False)
return "\n".join(ret._lines)
lines: list[str] = [
line.to_text(include_word_sync=include_word_sync) for line in self._lines
]
return "\n".join(lines)
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]: def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
+1 -1
View File
@@ -21,7 +21,7 @@ def is_better_result(
*, *,
allow_unsynced: bool, allow_unsynced: bool,
) -> bool: ) -> bool:
"""Return True when *new* should rank above *old*. """Return True when new should rank above old.
Ordering rules (highest first): Ordering rules (highest first):
1) Positive statuses always beat negative statuses. 1) Positive statuses always beat negative statuses.
+2 -2
View File
@@ -80,7 +80,7 @@ def test_cache_search_fetcher_with_fuzzy_metadata(
assert result is not None assert result is not None
assert result.lyrics is not None assert result.lyrics is not None
assert result.lyrics.to_lrc() == expected_lrc assert result.lyrics.to_text() == expected_lrc
def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager): def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
@@ -97,7 +97,7 @@ def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
assert result is not None assert result is not None
assert result.lyrics is not None assert result.lyrics is not None
assert result.lyrics.to_lrc() == "[00:00.01]artist modified" assert result.lyrics.to_text() == "[00:00.01]artist modified"
@pytest.mark.network @pytest.mark.network
+139 -32
View File
@@ -1,6 +1,11 @@
from __future__ import annotations from __future__ import annotations
from lrx_cli.lrc import LRCData from lrx_cli.lrc import (
LRCData,
DocTagLine,
LyricLine,
WordSyncLyricLine,
)
from lrx_cli.models import CacheStatus from lrx_cli.models import CacheStatus
@@ -8,7 +13,7 @@ def _normalize(text: str) -> str:
return str(LRCData(text)) return str(LRCData(text))
def test_normalize_tags_supports_all_raw_time_formats() -> None: def test_time_tag_formats_are_normalized() -> None:
raw = "\n".join( raw = "\n".join(
[ [
"[00:01]a", "[00:01]a",
@@ -32,37 +37,27 @@ def test_normalize_tags_supports_all_raw_time_formats() -> None:
) )
def test_normalize_tags_keeps_non_timed_lines_trimmed_and_unchanged() -> None: def test_non_timed_lines_are_kept_as_lyrics() -> None:
raw = " plain line \n\n [ar:Meta Header] " raw = " plain line \n\n other line "
normalized = _normalize(raw) normalized = _normalize(raw)
assert normalized == "plain line\n\n[ar:Meta Header]" assert normalized == "plain line\n\nother line"
def test_normalize_tags_removes_word_sync_patterns() -> None: def test_word_sync_tags_are_parsed_and_export_controlled() -> None:
raw = ( raw = "[00:01.00]<00:01>he <00:01.50>llo\n[00:02.00]plain"
"[00:01.00]<00:01>hello\n"
"[00:02.00]<00:02.3>world\n"
"[00:03.00]<00:03.45>foo\n"
"[00:04.00]<00:04:678>bar\n"
"[00:05.00]<1,2,3>baz"
)
normalized = _normalize(raw) data = LRCData(raw)
assert normalized == "\n".join( assert data.to_text(include_word_sync=False) == "[00:01.00]he llo\n[00:02.00]plain"
[ assert (
"[00:01.00]hello", data.to_text(include_word_sync=True)
"[00:02.00]world", == "[00:01.00]<00:01.00>he <00:01.50>llo\n[00:02.00]plain"
"[00:03.00]foo",
"[00:04.00]bar",
"[00:05.00]baz",
]
) )
def test_normalize_tags_keeps_midline_timestamps_as_is() -> None: def test_midline_line_tags_are_kept_as_plain_text() -> None:
raw = "[00:01.00]Lyric [00:02.00]line" raw = "[00:01.00]Lyric [00:02.00]line"
normalized = _normalize(raw) normalized = _normalize(raw)
@@ -74,11 +69,11 @@ def test_normalize_tags_applies_positive_and_negative_offset_per_spec() -> None:
positive = _normalize("[offset:+1000]\n[00:10.00]line") positive = _normalize("[offset:+1000]\n[00:10.00]line")
negative = _normalize("[offset:-500]\n[00:10.00]line") negative = _normalize("[offset:-500]\n[00:10.00]line")
assert positive == "[00:09.00]line" assert positive == "[offset:+1000]\n[00:10.00]line"
assert negative == "[00:10.50]line" assert negative == "[offset:-500]\n[00:10.00]line"
def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None: def test_leading_spaces_before_first_time_tag_are_trimmed() -> None:
raw = "\t [00:01.2] hello" raw = "\t [00:01.2] hello"
normalized = _normalize(raw) normalized = _normalize(raw)
@@ -89,12 +84,14 @@ def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None:
def test_normalize_tags_handles_consecutive_start_tags_with_spaces_between() -> None: def test_normalize_tags_handles_consecutive_start_tags_with_spaces_between() -> None:
raw = "[00:01] [00:02.3] chorus" raw = "[00:01] [00:02.3] chorus"
normalized = _normalize(raw) data = LRCData(raw)
assert len(data.lines) == 1
assert normalized == "[00:01.00][00:02.30]chorus" assert isinstance(data.lines[0], LyricLine)
assert data.lines[0].line_times_ms == [1000, 2300]
assert data.lines[0].text == "chorus"
def test_normalize_tags_preserves_non_leading_raw_like_tags() -> None: def test_non_leading_time_like_text_is_plain_lyric() -> None:
raw = "intro [00:01]line" raw = "intro [00:01]line"
normalized = _normalize(raw) normalized = _normalize(raw)
@@ -107,7 +104,7 @@ def test_normalize_tags_removes_offset_tag_line_even_without_lyrics() -> None:
normalized = _normalize(raw) normalized = _normalize(raw)
assert normalized == "" assert normalized == "[offset:+500]"
def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None: def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None:
@@ -140,7 +137,7 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
) )
def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None: def test_to_plain_duplicates_lines_for_multi_line_times() -> None:
text = "\n".join( text = "\n".join(
[ [
"[00:02.00][00:01.00]hello", "[00:02.00][00:01.00]hello",
@@ -210,3 +207,113 @@ def test_reformat_pipeline_trims_outer_blanks_and_preserves_inner_blanks() -> No
normalized = str(LRCData(text)) normalized = str(LRCData(text))
assert normalized == "[00:01.00]a\n\n[00:02.00]b" assert normalized == "[00:01.00]a\n\n[00:02.00]b"
def test_single_doc_tag_line_is_not_added_to_lines() -> None:
data = LRCData("[ar:Artist]\n[00:01.00]line")
assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 2
assert isinstance(data.lines[0], DocTagLine)
assert isinstance(data.lines[1], LyricLine)
assert data.lines[1].text == "line"
def test_multiple_doc_tags_on_one_line_are_plain_lyrics() -> None:
data = LRCData("[ar:Artist][ti:Song]")
assert data.tags == {}
assert len(data.lines) == 1
assert data.lines[0].text == "[ar:Artist][ti:Song]"
def test_doc_tag_after_lyrics_is_treated_as_lyrics() -> None:
data = LRCData("[00:01.00]line\n[ar:Artist]")
assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 2
assert isinstance(data.lines[1], DocTagLine)
assert data.lines[1].text == "[ar:Artist]"
def test_unknown_lines_before_lyrics_are_preserved_and_do_not_start_lyrics() -> None:
data = LRCData("comment line\n[ar:Artist]\n[00:01.00]line")
assert data.tags == {"ar": "Artist"}
assert len(data.lines) == 3
assert isinstance(data.lines[0], LyricLine)
assert isinstance(data.lines[1], DocTagLine)
assert data.lines[2].text == "line"
assert str(data).startswith("comment line\n[ar:Artist]\n")
def test_to_plain_excludes_doc_tags_but_keeps_lyrics() -> None:
data = LRCData("[ar:Artist]\n[00:01.00]line\n[ti:Song]\nplain")
assert data.to_plain() == "line"
def test_non_space_between_line_tags_stops_tag_parsing() -> None:
data = LRCData("[00:01.00]x[00:02.00]tail")
assert len(data.lines) == 1
assert isinstance(data.lines[0], LyricLine)
assert data.lines[0].line_times_ms == [1000]
assert data.lines[0].text == "x[00:02.00]tail"
def test_line_only_time_tag_is_valid_empty_lyric() -> None:
data = LRCData("[00:01.00]")
assert len(data.lines) == 1
assert isinstance(data.lines[0], LyricLine)
assert data.lines[0].line_times_ms == [1000]
assert data.lines[0].text == ""
def test_model_uses_subclass_for_word_sync_lines() -> None:
a = LRCData("[00:01.00]<00:00.50>lyric")
b = LRCData("[00:01.00]lyric")
assert isinstance(a.lines[0], WordSyncLyricLine)
assert isinstance(b.lines[0], LyricLine)
assert not isinstance(b.lines[0], WordSyncLyricLine)
def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None:
data = LRCData("[00:01.00]<00:02.00>")
assert isinstance(data.lines[0], WordSyncLyricLine)
assert data.to_text(include_word_sync=False) == "[00:01.00]"
assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>"
def test_to_text_plain_true_matches_to_plain_output() -> None:
data = LRCData("[00:02.00]b\n[00:01.00]a")
assert data.to_text(plain=True) == data.to_plain()
def test_to_unsynced_converts_to_plain_based_unsynced_data() -> None:
data = LRCData("[ar:Artist]\n[00:02.00]b\n[00:01.00]a")
unsynced = data.to_unsynced()
assert isinstance(unsynced, LRCData)
assert str(unsynced) == "a\nb"
def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None:
data = LRCData("[ar:First]\n[ar:Second]\n[00:01.00]line")
assert data.tags == {"ar": "Second"}
assert len(data.lines) == 3
assert isinstance(data.lines[0], DocTagLine)
assert isinstance(data.lines[1], DocTagLine)
assert str(data).startswith("[ar:First]\n[ar:Second]\n")
def test_to_plain_for_doc_only_text_is_empty() -> None:
data = LRCData("[ar:Artist]\n[ti:Song]")
assert data.to_plain() == ""
Generated
+1 -1
View File
@@ -153,7 +153,7 @@ wheels = [
[[package]] [[package]]
name = "lrx-cli" name = "lrx-cli"
version = "0.6.1" version = "0.6.2"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "cyclopts" }, { name = "cyclopts" },