942615348d
resolve conflicts
179 lines
5.9 KiB
Python
179 lines
5.9 KiB
Python
"""
|
|
Author: Uyanide pywang0608@foxmail.com
|
|
Date: 2026-03-25 21:54:01
|
|
Description: Shared LRC time-tag utilities (definitely overengineered)
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from urllib.parse import unquote
|
|
|
|
from .models import CacheStatus
|
|
|
|
# Parses any time tag input format:
|
|
# [mm:ss], [mm:ss.c], [mm:ss.cc], [mm:ss.ccc], [mm:ss:cc], …
|
|
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
|
|
|
|
# Standard format after normalization: [mm:ss.cc]
|
|
_STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
|
|
|
|
# Standard format with capture groups
|
|
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
|
|
|
|
# Matches a standard time tag at the start of a line
|
|
_LRC_LINE_RE = re.compile(r"^\[\d{2,}:\d{2}\.\d{2}\]", re.MULTILINE)
|
|
|
|
# [offset:+/-xxx] tag — value in milliseconds
|
|
_OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE)
|
|
|
|
|
|
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
|
|
"""Convert parsed time tag components to standard [mm:ss.cc] string."""
|
|
if frac is None:
|
|
ms = 0
|
|
else:
|
|
# cc in [mm:ss:cc] is also treated as centiseconds, per LRC spec
|
|
# ^
|
|
# why does this format even exist, idk
|
|
n = len(frac)
|
|
if n == 1:
|
|
ms = int(frac) * 100
|
|
elif n == 2:
|
|
ms = int(frac) * 10
|
|
else:
|
|
ms = int(frac)
|
|
cs = min(round(ms / 10), 99)
|
|
return f"[{mm}:{ss}.{cs:02d}]"
|
|
|
|
|
|
def _reformat(text: str) -> str:
|
|
"""Parse each line and reformat to standard [mm:ss.cc]...content form.
|
|
|
|
Handles any mix of time tag formats on input. Lines with no time tags
|
|
are stripped of leading/trailing whitespace and passed through unchanged.
|
|
"""
|
|
out: list[str] = []
|
|
for line in text.splitlines():
|
|
line = line.strip()
|
|
pos = 0
|
|
tags: list[str] = []
|
|
while True:
|
|
while pos < len(line) and line[pos] == " ":
|
|
pos += 1
|
|
m = _RAW_TAG_RE.match(line, pos)
|
|
# Non-time tags are passed through as-is, except for leading/trailing whitespace which is stripped.
|
|
if not m:
|
|
# No more tags on this line
|
|
break
|
|
tags.append(_raw_tag_to_cs(m.group(1), m.group(2), m.group(3)))
|
|
pos = m.end()
|
|
if tags:
|
|
# This could break lyric lines of some kind of word-synced LRC format,
|
|
# but such format were not planned to be supported in the first place, so…
|
|
out.append("".join(tags) + line[pos:].lstrip())
|
|
else:
|
|
out.append(line)
|
|
# Empty lines with no tags are also preserved
|
|
return "\n".join(out)
|
|
|
|
|
|
def _apply_offset(text: str) -> str:
|
|
"""Parse [offset:±ms] and shift all standard [mm:ss.cc] tags accordingly.
|
|
|
|
Per LRC spec, positive offset = lyrics appear sooner (subtract from timestamps).
|
|
"""
|
|
m = _OFFSET_RE.search(text)
|
|
if not m:
|
|
return text
|
|
offset_ms = int(m.group(1))
|
|
text = _OFFSET_RE.sub("", text).strip("\n")
|
|
if offset_ms == 0:
|
|
return text
|
|
|
|
def _shift(match: re.Match) -> str:
|
|
total_ms = max(
|
|
0,
|
|
(int(match.group(1)) * 60 + int(match.group(2))) * 1000
|
|
+ int(match.group(3)) * 10
|
|
- offset_ms,
|
|
)
|
|
new_mm = total_ms // 60000
|
|
new_ss = (total_ms % 60000) // 1000
|
|
new_cs = min(round((total_ms % 1000) / 10), 99)
|
|
return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:02d}]"
|
|
|
|
return _STD_TAG_CAPTURE_RE.sub(_shift, text)
|
|
|
|
|
|
def normalize_tags(text: str) -> str:
|
|
"""Normalize LRC to standard form: reformat all tags to [mm:ss.cc], then apply offset."""
|
|
return _apply_offset(_reformat(text))
|
|
|
|
|
|
def is_synced(text: str) -> bool:
|
|
"""Check whether text contains non-zero LRC time tags.
|
|
|
|
Assumes text has been normalized by normalize_tags (standard [mm:ss.cc] format).
|
|
"""
|
|
tags = _STD_TAG_RE.findall(text)
|
|
return bool(tags) and any(tag != "[00:00.00]" for tag in tags)
|
|
|
|
|
|
def detect_sync_status(text: str) -> CacheStatus:
|
|
"""Determine whether lyrics contain meaningful LRC time tags.
|
|
|
|
Assumes text has been normalized by normalize_tags.
|
|
"""
|
|
return (
|
|
CacheStatus.SUCCESS_SYNCED if is_synced(text) else CacheStatus.SUCCESS_UNSYNCED
|
|
)
|
|
|
|
|
|
def normalize_unsynced(lyrics: str) -> str:
|
|
"""Normalize unsynced lyrics so every line has a [00:00.00] tag.
|
|
|
|
- Lines that already have time tags: replace with [00:00.00]
|
|
- Lines without time tags: prepend [00:00.00]
|
|
- Blank lines are converted to [00:00.00]
|
|
"""
|
|
out: list[str] = []
|
|
for line in lyrics.splitlines():
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
out.append("[00:00.00]")
|
|
continue
|
|
cleaned = _LRC_LINE_RE.sub("", stripped)
|
|
while _LRC_LINE_RE.match(cleaned):
|
|
cleaned = _LRC_LINE_RE.sub("", cleaned)
|
|
out.append(f"[00:00.00]{cleaned}")
|
|
return "\n".join(out)
|
|
|
|
|
|
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
|
|
"""Convert file:// URL to Path, return None if invalid or (if ensure_exists) file doesn't exist."""
|
|
if not audio_url.startswith("file://"):
|
|
return None
|
|
file_path = unquote(audio_url.replace("file://", "", 1))
|
|
path = Path(file_path)
|
|
if ensure_exists and not path.exists():
|
|
return None
|
|
return path
|
|
|
|
|
|
def get_sidecar_path(
|
|
audio_url: str, ensure_audio_exists: bool = False, ensure_exists: bool = False
|
|
) -> Optional[Path]:
|
|
"""Given a file:// URL, return the corresponding .lrc sidecar path.
|
|
|
|
If ensure_audio_exists is True, return None if the audio file does not exist.
|
|
If ensure_exists is True, return None if the .lrc file does not exist.
|
|
"""
|
|
audio_path = get_audio_path(audio_url, ensure_exists=ensure_audio_exists)
|
|
if not audio_path:
|
|
return None
|
|
lrc_path = audio_path.with_suffix(".lrc")
|
|
if ensure_exists and not lrc_path.exists():
|
|
return None
|
|
return lrc_path
|