Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
b922a0df28
|
|||
|
1414066eed
|
+1
-1
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "lrx-cli"
|
name = "lrx-cli"
|
||||||
version = "0.6.1"
|
version = "0.6.2"
|
||||||
description = "Fetch line-synced lyrics for your music player."
|
description = "Fetch line-synced lyrics for your music player."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ _ALL_SLOTS = (SLOT_SYNCED, SLOT_UNSYNCED)
|
|||||||
|
|
||||||
|
|
||||||
# Fixed WHERE clause for exact track matching. Column names are hardcoded
|
# Fixed WHERE clause for exact track matching. Column names are hardcoded
|
||||||
# literals; only the *values* come from user-supplied params — no injection risk.
|
# literals; only the values come from user-supplied params — no injection risk.
|
||||||
_TRACK_WHERE = (
|
_TRACK_WHERE = (
|
||||||
"(? IS NULL OR artist = ?) AND "
|
"(? IS NULL OR artist = ?) AND "
|
||||||
"(? IS NULL OR title = ?) AND "
|
"(? IS NULL OR title = ?) AND "
|
||||||
@@ -249,7 +249,7 @@ class CacheEngine:
|
|||||||
# Read
|
# Read
|
||||||
|
|
||||||
def get_all(self, track: TrackMeta, source: str) -> list[LyricResult]:
|
def get_all(self, track: TrackMeta, source: str) -> list[LyricResult]:
|
||||||
"""Return all non-expired cached slot rows for *track*/*source*."""
|
"""Return all non-expired cached slot rows for track/source."""
|
||||||
try:
|
try:
|
||||||
key = _generate_key(track, source)
|
key = _generate_key(track, source)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@@ -430,7 +430,7 @@ class CacheEngine:
|
|||||||
def find_best_positive(
|
def find_best_positive(
|
||||||
self, track: TrackMeta, status: CacheStatus
|
self, track: TrackMeta, status: CacheStatus
|
||||||
) -> Optional[LyricResult]:
|
) -> Optional[LyricResult]:
|
||||||
"""Find the best positive (synced/unsynced) cache entry for *track*.
|
"""Find the best positive (synced/unsynced) cache entry for track.
|
||||||
|
|
||||||
Uses exact metadata match (artist + title + album) across all sources.
|
Uses exact metadata match (artist + title + album) across all sources.
|
||||||
Returns the highest-confidence entry, or None.
|
Returns the highest-confidence entry, or None.
|
||||||
@@ -488,7 +488,7 @@ class CacheEngine:
|
|||||||
making hard artist filtering unreliable for cross-language queries.
|
making hard artist filtering unreliable for cross-language queries.
|
||||||
|
|
||||||
Ignores artist, album and source. Only returns positive results
|
Ignores artist, album and source. Only returns positive results
|
||||||
(synced/unsynced) that have not expired. When *length* is provided,
|
(synced/unsynced) that have not expired. When length is provided,
|
||||||
filters by duration tolerance and sorts by closest match.
|
filters by duration tolerance and sorts by closest match.
|
||||||
"""
|
"""
|
||||||
if not title:
|
if not title:
|
||||||
@@ -551,7 +551,7 @@ class CacheEngine:
|
|||||||
confidence: float,
|
confidence: float,
|
||||||
source: str,
|
source: str,
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Update confidence for a specific source's cache entry matching *track*.
|
"""Update confidence for a specific source's cache entry matching track.
|
||||||
|
|
||||||
Returns the number of rows updated.
|
Returns the number of rows updated.
|
||||||
"""
|
"""
|
||||||
|
|||||||
+2
-2
@@ -123,7 +123,7 @@ def fetch(
|
|||||||
logger.error("No lyrics found.")
|
logger.error("No lyrics found.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(result.lyrics.to_lrc(plain=plain))
|
print(result.lyrics.to_text(plain=plain))
|
||||||
|
|
||||||
|
|
||||||
# search
|
# search
|
||||||
@@ -214,7 +214,7 @@ def search(
|
|||||||
logger.error("No lyrics found.")
|
logger.error("No lyrics found.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(result.lyrics.to_lrc(plain=plain))
|
print(result.lyrics.to_text(plain=plain))
|
||||||
|
|
||||||
|
|
||||||
# export
|
# export
|
||||||
|
|||||||
+1
-1
@@ -275,7 +275,7 @@ class LrcManager:
|
|||||||
bypass_cache: bool = False,
|
bypass_cache: bool = False,
|
||||||
allow_unsynced: bool = False,
|
allow_unsynced: bool = False,
|
||||||
) -> Optional[LyricResult]:
|
) -> Optional[LyricResult]:
|
||||||
"""Fetch lyrics for *track* using the group-based parallel pipeline."""
|
"""Fetch lyrics for track using the group-based parallel pipeline."""
|
||||||
return asyncio.run(
|
return asyncio.run(
|
||||||
self._fetch_for_track(
|
self._fetch_for_track(
|
||||||
track,
|
track,
|
||||||
|
|||||||
+266
-167
@@ -1,9 +1,11 @@
|
|||||||
"""
|
"""
|
||||||
Author: Uyanide pywang0608@foxmail.com
|
Author: Uyanide pywang0608@foxmail.com
|
||||||
Date: 2026-03-25 21:54:01
|
Date: 2026-03-25 21:54:01
|
||||||
Description: Shared LRC time-tag utilities (definitely overengineered).
|
Description: LRC parsing, modeling, and serialization helpers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass, field
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@@ -15,27 +17,18 @@ from .models import CacheStatus
|
|||||||
# [mm:ss], [mm:ss.c], [mm:ss.cc], [mm:ss.ccc], [mm:ss:cc], …
|
# [mm:ss], [mm:ss.c], [mm:ss.cc], [mm:ss.ccc], [mm:ss:cc], …
|
||||||
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
|
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
|
||||||
|
|
||||||
# Standard format after normalization: [mm:ss.cc]
|
# One or more leading bracket tags at line start.
|
||||||
# _STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
|
# Used to strip start tags in plain-mode fallback.
|
||||||
|
|
||||||
# Standard format with capture groups
|
|
||||||
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
|
|
||||||
|
|
||||||
# [offset:+/-xxx] tag — value in milliseconds
|
|
||||||
_OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE)
|
|
||||||
|
|
||||||
# Any number of ID/Time tags at the start of a line
|
|
||||||
_LINE_START_TAGS_RE = re.compile(r"^(?:\[[^\]]*\])+", re.MULTILINE)
|
_LINE_START_TAGS_RE = re.compile(r"^(?:\[[^\]]*\])+", re.MULTILINE)
|
||||||
|
|
||||||
# Any number of standard time tags at the start of a line
|
# Timed word-sync tags: <mm:ss>, <mm:ss.c>, <mm:ss.cc>, <mm:ss:cc>
|
||||||
_LINE_START_STD_TAGS_RE = re.compile(r"^(?:\[\d{2,}:\d{2}\.\d{2}\])+", re.MULTILINE)
|
_WORD_SYNC_TAG_RE = re.compile(r"<(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?>")
|
||||||
|
|
||||||
# Word-level sync tags
|
# A single doc-level tag line: [key:value].
|
||||||
# <mm:ss>, <mm:ss.c>, <mm:ss.cc>, <mm:ss:cc>, <xx,yy,zz>
|
# Disallow nested [] in value so multi-tag lines are not treated as doc tags.
|
||||||
_WORD_SYNC_TAG_RE = re.compile(r"<\d{2,}:\d{2}(?:[.:]\d{1,3})?>|<\d+,\d+,\d+>")
|
_DOC_TAG_RE = re.compile(r"^\[([^:\]\[]+):([^\[\]]*)\]$")
|
||||||
|
|
||||||
# QRC is totally a completely different matter. Since they are still providing standard LRC APIs,
|
# QRC uses a different format and is intentionally out of scope here.
|
||||||
# it might be a good idea to leave this mass to the future :)
|
|
||||||
|
|
||||||
|
|
||||||
def _remove_pattern(text: str, pattern: re.Pattern) -> str:
|
def _remove_pattern(text: str, pattern: re.Pattern) -> str:
|
||||||
@@ -58,170 +51,282 @@ def _raw_tag_to_ms(mm: str, ss: str, frac: Optional[str]) -> int:
|
|||||||
return (int(mm) * 60 + int(ss)) * 1000 + ms
|
return (int(mm) * 60 + int(ss)) * 1000 + ms
|
||||||
|
|
||||||
|
|
||||||
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
|
def _ms_to_std_tag(total_ms: int) -> str:
|
||||||
"""Convert parsed time tag components to standard [mm:ss.cc] string."""
|
mm = max(0, total_ms) // 60000
|
||||||
if frac is None:
|
ss = (max(0, total_ms) % 60000) // 1000
|
||||||
ms = 0
|
cs = min(round((max(0, total_ms) % 1000) / 10), 99)
|
||||||
else:
|
return f"[{mm:02d}:{ss:02d}.{cs:02d}]"
|
||||||
# cc in [mm:ss:cc] is also treated as centiseconds, per LRC spec
|
|
||||||
# ^
|
|
||||||
# why does this format even exist, idk
|
|
||||||
n = len(frac)
|
|
||||||
if n == 1:
|
|
||||||
ms = int(frac) * 100
|
|
||||||
elif n == 2:
|
|
||||||
ms = int(frac) * 10
|
|
||||||
else:
|
|
||||||
ms = int(frac)
|
|
||||||
cs = min(round(ms / 10), 99)
|
|
||||||
return f"[{mm}:{ss}.{cs:02d}]"
|
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_lyric_text(text: str) -> str:
|
def _ms_to_word_tag(total_ms: int) -> str:
|
||||||
"""Remove possibly word-sync time tags in lyric
|
mm = max(0, total_ms) // 60000
|
||||||
|
ss = (max(0, total_ms) % 60000) // 1000
|
||||||
|
cs = min(round((max(0, total_ms) % 1000) / 10), 99)
|
||||||
|
return f"<{mm:02d}:{ss:02d}.{cs:02d}>"
|
||||||
|
|
||||||
Assumes the normal line-sync time tags are already stripped.
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class LrcWordSegment:
|
||||||
|
text: str
|
||||||
|
time_ms: Optional[int] = None
|
||||||
|
duration_ms: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
|
class BaseLine(ABC):
|
||||||
|
"""Common line interface for rendering and text extraction."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def text(self) -> str:
|
||||||
|
"""Return plain text content for this line."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def to_text(self, include_word_sync: bool) -> str:
|
||||||
|
"""Return full serialized line text."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def to_plain_unsynced(self) -> Optional[str]:
|
||||||
|
"""Return this line's plain-text contribution in unsynced mode."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def timed_plain_entries(self) -> list[tuple[int, str]]:
|
||||||
|
"""Return (timestamp_ms, text) entries for synced plain-mode output."""
|
||||||
|
|
||||||
|
def has_nonzero_timestamp(self) -> bool:
|
||||||
|
return any(ts > 0 for ts, _ in self.timed_plain_entries())
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DocTagLine(BaseLine):
|
||||||
|
"""Represents a single doc tag line like [ar:Artist]."""
|
||||||
|
|
||||||
|
key: str
|
||||||
|
value: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self) -> str:
|
||||||
|
return f"[{self.key}:{self.value}]"
|
||||||
|
|
||||||
|
def to_text(self, include_word_sync: bool) -> str:
|
||||||
|
return self.text
|
||||||
|
|
||||||
|
def to_plain_unsynced(self) -> Optional[str]:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def timed_plain_entries(self) -> list[tuple[int, str]]:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LyricLine(BaseLine):
|
||||||
|
"""Lyric line with optional line-level timestamps."""
|
||||||
|
|
||||||
|
line_times_ms: list[int] = field(default_factory=list)
|
||||||
|
words: list[LrcWordSegment] = field(default_factory=list)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self) -> str:
|
||||||
|
return "".join(seg.text for seg in self.words)
|
||||||
|
|
||||||
|
def to_text(self, include_word_sync: bool) -> str:
|
||||||
|
prefix = "".join(_ms_to_std_tag(ms) for ms in self.line_times_ms)
|
||||||
|
return prefix + self.text
|
||||||
|
|
||||||
|
def to_plain_unsynced(self) -> Optional[str]:
|
||||||
|
return _remove_pattern(self.text, _LINE_START_TAGS_RE)
|
||||||
|
|
||||||
|
def timed_plain_entries(self) -> list[tuple[int, str]]:
|
||||||
|
return [(tag_ms, self.text) for tag_ms in self.line_times_ms]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WordSyncLyricLine(LyricLine):
|
||||||
|
"""Lyric line that can render per-word sync tags when requested."""
|
||||||
|
|
||||||
|
def to_text(self, include_word_sync: bool) -> str:
|
||||||
|
prefix = "".join(_ms_to_std_tag(ms) for ms in self.line_times_ms)
|
||||||
|
if not include_word_sync:
|
||||||
|
return prefix + self.text
|
||||||
|
parts: list[str] = []
|
||||||
|
for seg in self.words:
|
||||||
|
if seg.time_ms is not None:
|
||||||
|
parts.append(_ms_to_word_tag(seg.time_ms))
|
||||||
|
parts.append(seg.text)
|
||||||
|
return prefix + "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _split_trimmed_lines(text: str) -> list[str]:
|
||||||
|
"""Split text into lines, strip each line, and drop outer blank lines."""
|
||||||
|
|
||||||
|
lines = [line.strip() for line in text.splitlines()]
|
||||||
|
while lines and not lines[0].strip():
|
||||||
|
lines.pop(0)
|
||||||
|
while lines and not lines[-1].strip():
|
||||||
|
lines.pop()
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_leading_line_tags(line: str) -> tuple[list[int], str]:
|
||||||
|
"""Parse leading line-sync tags and return (times_ms, lyric_part).
|
||||||
|
|
||||||
|
Spaces between consecutive leading tags are dropped. If non-space text
|
||||||
|
appears, parsing of leading tags stops and the remainder is lyric text.
|
||||||
"""
|
"""
|
||||||
return _remove_pattern(text, _WORD_SYNC_TAG_RE)
|
pos = 0
|
||||||
|
tags_ms: list[int] = []
|
||||||
|
while True:
|
||||||
|
m = _RAW_TAG_RE.match(line, pos)
|
||||||
|
if not m:
|
||||||
|
break
|
||||||
|
tags_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
|
||||||
|
pos = m.end()
|
||||||
|
|
||||||
|
# Allow spaces only between consecutive leading tags.
|
||||||
|
# We only check for '[' here; the next loop decides whether it is a valid time tag.
|
||||||
|
scan = pos
|
||||||
|
while scan < len(line) and line[scan].isspace():
|
||||||
|
scan += 1
|
||||||
|
if scan < len(line) and line[scan] == "[":
|
||||||
|
pos = scan
|
||||||
|
continue
|
||||||
|
pos = scan
|
||||||
|
break
|
||||||
|
return tags_ms, line[pos:]
|
||||||
|
|
||||||
|
|
||||||
def _reformat(text: str) -> list[str]:
|
def _parse_word_segments(lyric_part: str) -> tuple[list[LrcWordSegment], bool]:
|
||||||
"""Parse each line and reformat to standard [mm:ss.cc]...content form.
|
"""Parse timed word-sync tags while preserving all lyric text exactly."""
|
||||||
|
segments: list[LrcWordSegment] = []
|
||||||
|
cursor = 0
|
||||||
|
current_time: Optional[int] = None
|
||||||
|
has_word_sync = False
|
||||||
|
|
||||||
Handles any mix of time tag formats on input. Lines with no time tags
|
for m in _WORD_SYNC_TAG_RE.finditer(lyric_part):
|
||||||
are stripped of leading/trailing whitespace and passed through unchanged.
|
piece = lyric_part[cursor : m.start()]
|
||||||
"""
|
if piece:
|
||||||
out: list[str] = []
|
segments.append(LrcWordSegment(text=piece, time_ms=current_time))
|
||||||
for line in text.splitlines():
|
current_time = _raw_tag_to_ms(m.group(1), m.group(2), m.group(3))
|
||||||
line = line.strip()
|
has_word_sync = True
|
||||||
pos = 0
|
cursor = m.end()
|
||||||
tags: list[str] = []
|
|
||||||
while True:
|
|
||||||
while pos < len(line) and line[pos].isspace():
|
|
||||||
pos += 1
|
|
||||||
m = _RAW_TAG_RE.match(line, pos)
|
|
||||||
# Non-time tags are passed through as-is, except for leading/trailing whitespace which is stripped.
|
|
||||||
if not m:
|
|
||||||
# No more tags on this line
|
|
||||||
break
|
|
||||||
tags.append(_raw_tag_to_cs(m.group(1), m.group(2), m.group(3)))
|
|
||||||
pos = m.end()
|
|
||||||
if tags:
|
|
||||||
# This could break lyric lines of some kind of word-synced LRC format, e.g.
|
|
||||||
# [00:01.00]Lyric [00:02.00]line
|
|
||||||
# but such format were not planned to be supported in the first place, so…
|
|
||||||
out.append(_sanitize_lyric_text("".join(tags) + line[pos:]))
|
|
||||||
else:
|
|
||||||
out.append(line)
|
|
||||||
# Empty lines with no tags are also preserved
|
|
||||||
|
|
||||||
# Remove empty lines at the start and end of the whole text, but preserve blank lines in the middle
|
tail = lyric_part[cursor:]
|
||||||
while out and not out[0].strip():
|
if tail or not segments:
|
||||||
out.pop(0)
|
segments.append(
|
||||||
while out and not out[-1].strip():
|
LrcWordSegment(
|
||||||
out.pop()
|
text=tail,
|
||||||
|
time_ms=current_time if has_word_sync else None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return segments, has_word_sync
|
||||||
|
|
||||||
return out
|
|
||||||
|
def _is_single_doc_tag_line(line: str) -> Optional[tuple[str, str]]:
|
||||||
|
"""Return (key, value) only for standalone single doc-tag lines."""
|
||||||
|
|
||||||
|
if _RAW_TAG_RE.fullmatch(line):
|
||||||
|
return None
|
||||||
|
m = _DOC_TAG_RE.fullmatch(line)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
key = m.group(1).strip()
|
||||||
|
value = m.group(2).strip()
|
||||||
|
return key, value
|
||||||
|
|
||||||
|
|
||||||
class LRCData:
|
class LRCData:
|
||||||
_lines: list[str]
|
_lines: list[BaseLine]
|
||||||
|
_doc_tags: dict[str, str]
|
||||||
|
|
||||||
def __init__(self, text: str | None = None) -> None:
|
def __init__(self, text: Optional[str] = None) -> None:
|
||||||
|
self._doc_tags = {}
|
||||||
if not text:
|
if not text:
|
||||||
self._lines = []
|
self._lines = []
|
||||||
return
|
return
|
||||||
self._lines = _reformat(text)
|
|
||||||
self._apply_offset()
|
raw_lines = _split_trimmed_lines(text)
|
||||||
|
parsed: list[BaseLine] = []
|
||||||
|
|
||||||
|
for raw in raw_lines:
|
||||||
|
maybe_tag = _is_single_doc_tag_line(raw)
|
||||||
|
if maybe_tag is not None:
|
||||||
|
key, value = maybe_tag
|
||||||
|
self._doc_tags[key] = value
|
||||||
|
parsed.append(DocTagLine(key=key, value=value))
|
||||||
|
continue
|
||||||
|
|
||||||
|
tags_ms, lyric_part = _extract_leading_line_tags(raw)
|
||||||
|
words, has_word_sync = _parse_word_segments(lyric_part if tags_ms else raw)
|
||||||
|
|
||||||
|
if has_word_sync:
|
||||||
|
parsed.append(WordSyncLyricLine(line_times_ms=tags_ms, words=words))
|
||||||
|
else:
|
||||||
|
parsed.append(LyricLine(line_times_ms=tags_ms, words=words))
|
||||||
|
|
||||||
|
self._lines = parsed
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
return "\n".join(self._lines)
|
return self.to_text(plain=False, include_word_sync=False)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"LRCData(lines={self._lines!r})"
|
return f"LRCData(doc_tags={self._doc_tags!r}, lines={self._lines!r})"
|
||||||
|
|
||||||
def __bool__(self) -> bool:
|
|
||||||
return len(self._lines) > 0
|
|
||||||
|
|
||||||
def __len__(self) -> int:
|
def __len__(self) -> int:
|
||||||
return len(self._lines)
|
return len(self._lines)
|
||||||
|
|
||||||
def _apply_offset(self):
|
@property
|
||||||
"""Parse [offset:±ms] and shift all standard [mm:ss.cc] tags accordingly.
|
def tags(self) -> dict[str, str]:
|
||||||
|
return self._doc_tags
|
||||||
|
|
||||||
Per LRC spec, positive offset = lyrics appear sooner (subtract from timestamps).
|
@property
|
||||||
"""
|
def lines(self) -> list[BaseLine]:
|
||||||
m: Optional[re.Match] = None
|
return self._lines
|
||||||
for i, line in enumerate(self._lines):
|
|
||||||
m = _OFFSET_RE.search(line)
|
|
||||||
if m:
|
|
||||||
self._lines.pop(i)
|
|
||||||
break
|
|
||||||
if not m:
|
|
||||||
return
|
|
||||||
offset_ms = int(m.group(1))
|
|
||||||
if offset_ms == 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
def _shift(match: re.Match) -> str:
|
|
||||||
total_ms = max(
|
|
||||||
0,
|
|
||||||
(int(match.group(1)) * 60 + int(match.group(2))) * 1000
|
|
||||||
+ int(match.group(3)) * 10
|
|
||||||
- offset_ms,
|
|
||||||
)
|
|
||||||
new_mm = total_ms // 60000
|
|
||||||
new_ss = (total_ms % 60000) // 1000
|
|
||||||
new_cs = min(round((total_ms % 1000) / 10), 99)
|
|
||||||
return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:02d}]"
|
|
||||||
|
|
||||||
self._lines = [_STD_TAG_CAPTURE_RE.sub(_shift, line) for line in self._lines]
|
|
||||||
|
|
||||||
def is_synced(self) -> bool:
|
def is_synced(self) -> bool:
|
||||||
"""Check whether text contains non-zero LRC time tags.
|
"""Return True if any lyric line contains a non-zero line timestamp."""
|
||||||
|
return any(line.has_nonzero_timestamp() for line in self._lines)
|
||||||
Assumes text has been normalized by normalize (standard [mm:ss.cc] format).
|
|
||||||
"""
|
|
||||||
for line in self._lines:
|
|
||||||
for m in _STD_TAG_CAPTURE_RE.finditer(line):
|
|
||||||
if m.group(1) != "00" or m.group(2) != "00" or m.group(3) != "00":
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def detect_sync_status(self) -> CacheStatus:
|
def detect_sync_status(self) -> CacheStatus:
|
||||||
"""Determine whether lyrics contain meaningful LRC time tags.
|
"""Map sync detection result to cache status."""
|
||||||
|
|
||||||
Assumes text has been normalized by normalize.
|
|
||||||
"""
|
|
||||||
return (
|
return (
|
||||||
CacheStatus.SUCCESS_SYNCED
|
CacheStatus.SUCCESS_SYNCED
|
||||||
if self.is_synced()
|
if self.is_synced()
|
||||||
else CacheStatus.SUCCESS_UNSYNCED
|
else CacheStatus.SUCCESS_UNSYNCED
|
||||||
)
|
)
|
||||||
|
|
||||||
def normalize_unsynced(self):
|
def normalize_unsynced(self) -> "LRCData":
|
||||||
"""Normalize unsynced lyrics so every line has a [00:00.00] tag.
|
"""Convert lyrics into unsynced LRC form with [00:00.00] tags.
|
||||||
|
|
||||||
Assumes lyrics have been normalized by normalize.
|
- Leading blank lyric lines are skipped.
|
||||||
- Lines that already have time tags: replace with [00:00.00]
|
- Middle blank lyric lines are preserved as empty synced lines.
|
||||||
- Lines without leading tags: prepend [00:00.00]
|
- Doc-tag lines are preserved unchanged.
|
||||||
- Blank lines in middle are converted to [00:00.00]
|
|
||||||
"""
|
"""
|
||||||
out: list[str] = []
|
out: list[BaseLine] = []
|
||||||
first = True
|
first = True
|
||||||
for i, line in enumerate(self._lines):
|
for line in self._lines:
|
||||||
stripped = line.strip()
|
if isinstance(line, DocTagLine):
|
||||||
|
out.append(DocTagLine(key=line.key, value=line.value))
|
||||||
|
continue
|
||||||
|
|
||||||
|
assert isinstance(line, LyricLine)
|
||||||
|
|
||||||
|
stripped = line.text.strip()
|
||||||
if not stripped and not first:
|
if not stripped and not first:
|
||||||
out.append("[00:00.00]")
|
out.append(
|
||||||
|
LyricLine(line_times_ms=[0], words=[LrcWordSegment(text="")])
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
elif not stripped:
|
elif not stripped:
|
||||||
# Skip leading blank lines
|
|
||||||
continue
|
continue
|
||||||
first = False
|
first = False
|
||||||
cleaned = _remove_pattern(line, _LINE_START_STD_TAGS_RE)
|
out.append(
|
||||||
out.append(f"[00:00.00]{cleaned}")
|
LyricLine(
|
||||||
|
line_times_ms=[0],
|
||||||
|
words=[LrcWordSegment(text=line.text)],
|
||||||
|
)
|
||||||
|
)
|
||||||
ret = LRCData()
|
ret = LRCData()
|
||||||
ret._lines = out
|
ret._lines = out
|
||||||
|
ret._doc_tags = dict(self._doc_tags)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def to_plain(
|
def to_plain(
|
||||||
@@ -230,32 +335,22 @@ class LRCData:
|
|||||||
) -> str:
|
) -> str:
|
||||||
"""Convert lyrics to plain text with all tags stripped.
|
"""Convert lyrics to plain text with all tags stripped.
|
||||||
|
|
||||||
If deduplicate is True, only keep the first line of consecutive lines with the same lyric text (after stripping tags).
|
If synced, output is sorted by line timestamp and duplicated for multi-tag lines.
|
||||||
Otherwise, lines with multiple time tags will be duplicated as many times as the number of tags.
|
If not synced, leading bracket tags are stripped per line and original order is kept.
|
||||||
Assumes text has been normalized by normalize.
|
If deduplicate is True, only consecutive duplicate plain lines are collapsed.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not self.is_synced():
|
if not self.is_synced():
|
||||||
return "\n".join(
|
plain_lines = [
|
||||||
_remove_pattern(line, _LINE_START_TAGS_RE) for line in self._lines
|
text
|
||||||
).strip("\n")
|
for text in (line.to_plain_unsynced() for line in self._lines)
|
||||||
|
if text is not None
|
||||||
|
]
|
||||||
|
return "\n".join(plain_lines).strip("\n")
|
||||||
|
|
||||||
tagged_lines = []
|
tagged_lines: list[tuple[int, str]] = []
|
||||||
for line in self._lines:
|
for line in self._lines:
|
||||||
pos = 0
|
tagged_lines.extend(line.timed_plain_entries())
|
||||||
tag_ms = []
|
|
||||||
while True:
|
|
||||||
# Only match strictly repeated standard time tags at the start of the line
|
|
||||||
# Lines without any time tags are ignored.
|
|
||||||
# Lyric lines are considered already stripped of whitespaces, so no strips here.
|
|
||||||
m = _STD_TAG_CAPTURE_RE.match(line, pos)
|
|
||||||
if not m:
|
|
||||||
lyric = line[pos:]
|
|
||||||
for tag in tag_ms:
|
|
||||||
tagged_lines.append((tag, lyric))
|
|
||||||
break
|
|
||||||
tag_ms.append(_raw_tag_to_ms(m.group(1), m.group(2), m.group(3)))
|
|
||||||
pos = m.end()
|
|
||||||
|
|
||||||
sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])]
|
sorted_lines = [lyric for _, lyric in sorted(tagged_lines, key=lambda x: x[0])]
|
||||||
|
|
||||||
@@ -271,23 +366,27 @@ class LRCData:
|
|||||||
|
|
||||||
return "\n".join(sorted_lines).strip()
|
return "\n".join(sorted_lines).strip()
|
||||||
|
|
||||||
def to_unsynced(self):
|
def to_unsynced(self) -> "LRCData":
|
||||||
|
"""Return a plain-text based unsynced representation."""
|
||||||
return LRCData(self.to_plain())
|
return LRCData(self.to_plain())
|
||||||
|
|
||||||
def to_lrc(
|
def to_text(
|
||||||
self,
|
self,
|
||||||
plain: bool = False,
|
plain: bool = False,
|
||||||
|
include_word_sync: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Return lyrics, optionally stripping tags.
|
"""Serialize to LRC text or plain text.
|
||||||
|
|
||||||
Assumes text has been normalized by normalize.
|
- plain=True returns to_plain().
|
||||||
|
- include_word_sync controls rendering of per-word tags for word-sync lines.
|
||||||
"""
|
"""
|
||||||
ret = self
|
|
||||||
if not self.is_synced():
|
|
||||||
ret = self.normalize_unsynced()
|
|
||||||
if plain:
|
if plain:
|
||||||
return ret.to_plain()
|
return self.to_plain(deduplicate=False)
|
||||||
return "\n".join(ret._lines)
|
|
||||||
|
lines: list[str] = [
|
||||||
|
line.to_text(include_word_sync=include_word_sync) for line in self._lines
|
||||||
|
]
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
|
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ def is_better_result(
|
|||||||
*,
|
*,
|
||||||
allow_unsynced: bool,
|
allow_unsynced: bool,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Return True when *new* should rank above *old*.
|
"""Return True when new should rank above old.
|
||||||
|
|
||||||
Ordering rules (highest first):
|
Ordering rules (highest first):
|
||||||
1) Positive statuses always beat negative statuses.
|
1) Positive statuses always beat negative statuses.
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ def test_cache_search_fetcher_with_fuzzy_metadata(
|
|||||||
|
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert result.lyrics is not None
|
assert result.lyrics is not None
|
||||||
assert result.lyrics.to_lrc() == expected_lrc
|
assert result.lyrics.to_text() == expected_lrc
|
||||||
|
|
||||||
|
|
||||||
def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
|
def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
|
||||||
@@ -97,7 +97,7 @@ def test_cache_search_fetcher_prefer_better_match(lrc_manager: LrcManager):
|
|||||||
|
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert result.lyrics is not None
|
assert result.lyrics is not None
|
||||||
assert result.lyrics.to_lrc() == "[00:00.01]artist modified"
|
assert result.lyrics.to_text() == "[00:00.01]artist modified"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.network
|
@pytest.mark.network
|
||||||
|
|||||||
+139
-32
@@ -1,6 +1,11 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from lrx_cli.lrc import LRCData
|
from lrx_cli.lrc import (
|
||||||
|
LRCData,
|
||||||
|
DocTagLine,
|
||||||
|
LyricLine,
|
||||||
|
WordSyncLyricLine,
|
||||||
|
)
|
||||||
from lrx_cli.models import CacheStatus
|
from lrx_cli.models import CacheStatus
|
||||||
|
|
||||||
|
|
||||||
@@ -8,7 +13,7 @@ def _normalize(text: str) -> str:
|
|||||||
return str(LRCData(text))
|
return str(LRCData(text))
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_tags_supports_all_raw_time_formats() -> None:
|
def test_time_tag_formats_are_normalized() -> None:
|
||||||
raw = "\n".join(
|
raw = "\n".join(
|
||||||
[
|
[
|
||||||
"[00:01]a",
|
"[00:01]a",
|
||||||
@@ -32,37 +37,27 @@ def test_normalize_tags_supports_all_raw_time_formats() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_tags_keeps_non_timed_lines_trimmed_and_unchanged() -> None:
|
def test_non_timed_lines_are_kept_as_lyrics() -> None:
|
||||||
raw = " plain line \n\n [ar:Meta Header] "
|
raw = " plain line \n\n other line "
|
||||||
|
|
||||||
normalized = _normalize(raw)
|
normalized = _normalize(raw)
|
||||||
|
|
||||||
assert normalized == "plain line\n\n[ar:Meta Header]"
|
assert normalized == "plain line\n\nother line"
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_tags_removes_word_sync_patterns() -> None:
|
def test_word_sync_tags_are_parsed_and_export_controlled() -> None:
|
||||||
raw = (
|
raw = "[00:01.00]<00:01>he <00:01.50>llo\n[00:02.00]plain"
|
||||||
"[00:01.00]<00:01>hello\n"
|
|
||||||
"[00:02.00]<00:02.3>world\n"
|
|
||||||
"[00:03.00]<00:03.45>foo\n"
|
|
||||||
"[00:04.00]<00:04:678>bar\n"
|
|
||||||
"[00:05.00]<1,2,3>baz"
|
|
||||||
)
|
|
||||||
|
|
||||||
normalized = _normalize(raw)
|
data = LRCData(raw)
|
||||||
|
|
||||||
assert normalized == "\n".join(
|
assert data.to_text(include_word_sync=False) == "[00:01.00]he llo\n[00:02.00]plain"
|
||||||
[
|
assert (
|
||||||
"[00:01.00]hello",
|
data.to_text(include_word_sync=True)
|
||||||
"[00:02.00]world",
|
== "[00:01.00]<00:01.00>he <00:01.50>llo\n[00:02.00]plain"
|
||||||
"[00:03.00]foo",
|
|
||||||
"[00:04.00]bar",
|
|
||||||
"[00:05.00]baz",
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_tags_keeps_midline_timestamps_as_is() -> None:
|
def test_midline_line_tags_are_kept_as_plain_text() -> None:
|
||||||
raw = "[00:01.00]Lyric [00:02.00]line"
|
raw = "[00:01.00]Lyric [00:02.00]line"
|
||||||
|
|
||||||
normalized = _normalize(raw)
|
normalized = _normalize(raw)
|
||||||
@@ -74,11 +69,11 @@ def test_normalize_tags_applies_positive_and_negative_offset_per_spec() -> None:
|
|||||||
positive = _normalize("[offset:+1000]\n[00:10.00]line")
|
positive = _normalize("[offset:+1000]\n[00:10.00]line")
|
||||||
negative = _normalize("[offset:-500]\n[00:10.00]line")
|
negative = _normalize("[offset:-500]\n[00:10.00]line")
|
||||||
|
|
||||||
assert positive == "[00:09.00]line"
|
assert positive == "[offset:+1000]\n[00:10.00]line"
|
||||||
assert negative == "[00:10.50]line"
|
assert negative == "[offset:-500]\n[00:10.00]line"
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None:
|
def test_leading_spaces_before_first_time_tag_are_trimmed() -> None:
|
||||||
raw = "\t [00:01.2] hello"
|
raw = "\t [00:01.2] hello"
|
||||||
|
|
||||||
normalized = _normalize(raw)
|
normalized = _normalize(raw)
|
||||||
@@ -89,12 +84,14 @@ def test_normalize_tags_accepts_leading_spaces_and_tabs_before_tags() -> None:
|
|||||||
def test_normalize_tags_handles_consecutive_start_tags_with_spaces_between() -> None:
|
def test_normalize_tags_handles_consecutive_start_tags_with_spaces_between() -> None:
|
||||||
raw = "[00:01] [00:02.3] chorus"
|
raw = "[00:01] [00:02.3] chorus"
|
||||||
|
|
||||||
normalized = _normalize(raw)
|
data = LRCData(raw)
|
||||||
|
assert len(data.lines) == 1
|
||||||
assert normalized == "[00:01.00][00:02.30]chorus"
|
assert isinstance(data.lines[0], LyricLine)
|
||||||
|
assert data.lines[0].line_times_ms == [1000, 2300]
|
||||||
|
assert data.lines[0].text == "chorus"
|
||||||
|
|
||||||
|
|
||||||
def test_normalize_tags_preserves_non_leading_raw_like_tags() -> None:
|
def test_non_leading_time_like_text_is_plain_lyric() -> None:
|
||||||
raw = "intro [00:01]line"
|
raw = "intro [00:01]line"
|
||||||
|
|
||||||
normalized = _normalize(raw)
|
normalized = _normalize(raw)
|
||||||
@@ -107,7 +104,7 @@ def test_normalize_tags_removes_offset_tag_line_even_without_lyrics() -> None:
|
|||||||
|
|
||||||
normalized = _normalize(raw)
|
normalized = _normalize(raw)
|
||||||
|
|
||||||
assert normalized == ""
|
assert normalized == "[offset:+500]"
|
||||||
|
|
||||||
|
|
||||||
def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None:
|
def test_is_synced_and_detect_sync_status_follow_non_zero_rule() -> None:
|
||||||
@@ -140,7 +137,7 @@ def test_normalize_unsynced_covers_documented_blank_and_tag_rules() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_to_plain_duplicates_lines_by_leading_repeated_timestamps() -> None:
|
def test_to_plain_duplicates_lines_for_multi_line_times() -> None:
|
||||||
text = "\n".join(
|
text = "\n".join(
|
||||||
[
|
[
|
||||||
"[00:02.00][00:01.00]hello",
|
"[00:02.00][00:01.00]hello",
|
||||||
@@ -210,3 +207,113 @@ def test_reformat_pipeline_trims_outer_blanks_and_preserves_inner_blanks() -> No
|
|||||||
normalized = str(LRCData(text))
|
normalized = str(LRCData(text))
|
||||||
|
|
||||||
assert normalized == "[00:01.00]a\n\n[00:02.00]b"
|
assert normalized == "[00:01.00]a\n\n[00:02.00]b"
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_doc_tag_line_is_not_added_to_lines() -> None:
|
||||||
|
data = LRCData("[ar:Artist]\n[00:01.00]line")
|
||||||
|
|
||||||
|
assert data.tags == {"ar": "Artist"}
|
||||||
|
assert len(data.lines) == 2
|
||||||
|
assert isinstance(data.lines[0], DocTagLine)
|
||||||
|
assert isinstance(data.lines[1], LyricLine)
|
||||||
|
assert data.lines[1].text == "line"
|
||||||
|
|
||||||
|
|
||||||
|
def test_multiple_doc_tags_on_one_line_are_plain_lyrics() -> None:
|
||||||
|
data = LRCData("[ar:Artist][ti:Song]")
|
||||||
|
|
||||||
|
assert data.tags == {}
|
||||||
|
assert len(data.lines) == 1
|
||||||
|
assert data.lines[0].text == "[ar:Artist][ti:Song]"
|
||||||
|
|
||||||
|
|
||||||
|
def test_doc_tag_after_lyrics_is_treated_as_lyrics() -> None:
|
||||||
|
data = LRCData("[00:01.00]line\n[ar:Artist]")
|
||||||
|
|
||||||
|
assert data.tags == {"ar": "Artist"}
|
||||||
|
assert len(data.lines) == 2
|
||||||
|
assert isinstance(data.lines[1], DocTagLine)
|
||||||
|
assert data.lines[1].text == "[ar:Artist]"
|
||||||
|
|
||||||
|
|
||||||
|
def test_unknown_lines_before_lyrics_are_preserved_and_do_not_start_lyrics() -> None:
|
||||||
|
data = LRCData("comment line\n[ar:Artist]\n[00:01.00]line")
|
||||||
|
|
||||||
|
assert data.tags == {"ar": "Artist"}
|
||||||
|
assert len(data.lines) == 3
|
||||||
|
assert isinstance(data.lines[0], LyricLine)
|
||||||
|
assert isinstance(data.lines[1], DocTagLine)
|
||||||
|
assert data.lines[2].text == "line"
|
||||||
|
assert str(data).startswith("comment line\n[ar:Artist]\n")
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_plain_excludes_doc_tags_but_keeps_lyrics() -> None:
|
||||||
|
data = LRCData("[ar:Artist]\n[00:01.00]line\n[ti:Song]\nplain")
|
||||||
|
|
||||||
|
assert data.to_plain() == "line"
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_space_between_line_tags_stops_tag_parsing() -> None:
|
||||||
|
data = LRCData("[00:01.00]x[00:02.00]tail")
|
||||||
|
|
||||||
|
assert len(data.lines) == 1
|
||||||
|
assert isinstance(data.lines[0], LyricLine)
|
||||||
|
assert data.lines[0].line_times_ms == [1000]
|
||||||
|
assert data.lines[0].text == "x[00:02.00]tail"
|
||||||
|
|
||||||
|
|
||||||
|
def test_line_only_time_tag_is_valid_empty_lyric() -> None:
|
||||||
|
data = LRCData("[00:01.00]")
|
||||||
|
|
||||||
|
assert len(data.lines) == 1
|
||||||
|
assert isinstance(data.lines[0], LyricLine)
|
||||||
|
assert data.lines[0].line_times_ms == [1000]
|
||||||
|
assert data.lines[0].text == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_model_uses_subclass_for_word_sync_lines() -> None:
|
||||||
|
a = LRCData("[00:01.00]<00:00.50>lyric")
|
||||||
|
b = LRCData("[00:01.00]lyric")
|
||||||
|
|
||||||
|
assert isinstance(a.lines[0], WordSyncLyricLine)
|
||||||
|
assert isinstance(b.lines[0], LyricLine)
|
||||||
|
assert not isinstance(b.lines[0], WordSyncLyricLine)
|
||||||
|
|
||||||
|
|
||||||
|
def test_word_sync_line_with_empty_tail_keeps_word_tag_only_when_enabled() -> None:
|
||||||
|
data = LRCData("[00:01.00]<00:02.00>")
|
||||||
|
|
||||||
|
assert isinstance(data.lines[0], WordSyncLyricLine)
|
||||||
|
assert data.to_text(include_word_sync=False) == "[00:01.00]"
|
||||||
|
assert data.to_text(include_word_sync=True) == "[00:01.00]<00:02.00>"
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_text_plain_true_matches_to_plain_output() -> None:
|
||||||
|
data = LRCData("[00:02.00]b\n[00:01.00]a")
|
||||||
|
|
||||||
|
assert data.to_text(plain=True) == data.to_plain()
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_unsynced_converts_to_plain_based_unsynced_data() -> None:
|
||||||
|
data = LRCData("[ar:Artist]\n[00:02.00]b\n[00:01.00]a")
|
||||||
|
|
||||||
|
unsynced = data.to_unsynced()
|
||||||
|
|
||||||
|
assert isinstance(unsynced, LRCData)
|
||||||
|
assert str(unsynced) == "a\nb"
|
||||||
|
|
||||||
|
|
||||||
|
def test_duplicate_doc_tag_key_last_value_wins_but_lines_are_kept() -> None:
|
||||||
|
data = LRCData("[ar:First]\n[ar:Second]\n[00:01.00]line")
|
||||||
|
|
||||||
|
assert data.tags == {"ar": "Second"}
|
||||||
|
assert len(data.lines) == 3
|
||||||
|
assert isinstance(data.lines[0], DocTagLine)
|
||||||
|
assert isinstance(data.lines[1], DocTagLine)
|
||||||
|
assert str(data).startswith("[ar:First]\n[ar:Second]\n")
|
||||||
|
|
||||||
|
|
||||||
|
def test_to_plain_for_doc_only_text_is_empty() -> None:
|
||||||
|
data = LRCData("[ar:Artist]\n[ti:Song]")
|
||||||
|
|
||||||
|
assert data.to_plain() == ""
|
||||||
|
|||||||
Reference in New Issue
Block a user