""" Author: Uyanide pywang0608@foxmail.com Date: 2026-03-25 21:54:01 Description: Shared LRC time-tag utilities """ import re from pathlib import Path from typing import Optional from urllib.parse import unquote from .models import CacheStatus # Standard format: [mm:ss.cc] or [mm:ss.ccc] _STANDARD_TAG_RE = re.compile(r"\[\d{2}:\d{2}\.\d{2,3}\]") # Non-standard format: [mm:ss:cc] (two colons instead of dot) _COLON_TAG_RE = re.compile(r"\[(\d{2}:\d{2}):(\d{2,3})\]") # Matches any LRC time tag (standard or non-standard) at start of line LRC_LINE_RE = re.compile(r"^\[(\d{2}:\d{2}[.:]\d{2,3})\]", re.MULTILINE) # All-zero tags _ZERO_TAG_RE = re.compile(r"^\[00:00[.:]0{2,3}\]$") # [offset:+/-xxx] tag — value in milliseconds _OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE) # Time tag for offset application: captures mm, ss, cc/ccc _TIME_TAG_RE = re.compile(r"\[(\d{2}):(\d{2})\.(\d{2,3})\]") def _apply_offset(text: str) -> str: """Parse [offset:±ms] tag and shift all time tags accordingly. Per LRC spec, a positive offset means lyrics appear sooner (subtract from timestamps), negative means later (add to timestamps). """ m = _OFFSET_RE.search(text) if not m: return text offset_ms = int(m.group(1)) if offset_ms == 0: return _OFFSET_RE.sub("", text).strip("\n") # Remove the offset tag line text = _OFFSET_RE.sub("", text) def _shift(match: re.Match) -> str: mm, ss, cs = int(match.group(1)), int(match.group(2)), match.group(3) # Normalize centiseconds to milliseconds if len(cs) == 2: ms = int(cs) * 10 fmt_cs = 2 else: ms = int(cs) fmt_cs = 3 total_ms = (mm * 60 + ss) * 1000 + ms - offset_ms total_ms = max(0, total_ms) new_mm = total_ms // 60000 new_ss = (total_ms % 60000) // 1000 new_cs = total_ms % 1000 if fmt_cs == 2: new_cs = new_cs // 10 return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:02d}]" return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:03d}]" return _TIME_TAG_RE.sub(_shift, text) def normalize_tags(text: str) -> str: """Normalize LRC time tags: colon format → dot format, then apply offset.""" text = _COLON_TAG_RE.sub(r"[\1.\2]", text) return _apply_offset(text) def is_synced(text: str) -> bool: """Check whether text contains actual LRC time tags with non-zero times. Returns False if no tags exist or all tags are [00:00.00]. Handles both [mm:ss.cc] and [mm:ss:cc] formats. """ tags = _STANDARD_TAG_RE.findall(text) # Also check non-standard format tags += [f"[{m.group(1)}.{m.group(2)}]" for m in _COLON_TAG_RE.finditer(text)] if not tags: return False for tag in tags: if not _ZERO_TAG_RE.match(tag): return True return False def detect_sync_status(text: str) -> CacheStatus: """Determine whether lyrics contain meaningful LRC time tags.""" return ( CacheStatus.SUCCESS_SYNCED if is_synced(text) else CacheStatus.SUCCESS_UNSYNCED ) def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]: """Convert file:// URL to Path, return None if invalid or (if ensure_exists) file doesn't exist.""" if not audio_url.startswith("file://"): return None file_path = unquote(audio_url.replace("file://", "", 1)) path = Path(file_path) if ensure_exists and not path.exists(): return None return path def get_sidecar_path( audio_url: str, ensure_audio_exists: bool = False, ensure_exists: bool = False ) -> Optional[Path]: """Given a file:// URL, return the corresponding .lrc sidecar path. If ensure_audio_exists is True, return None if the audio file does not exist. If ensure_exists is True, return None if the .lrc file does not exist. """ audio_path = get_audio_path(audio_url, ensure_exists=ensure_audio_exists) if not audio_path: return None lrc_path = audio_path.with_suffix(".lrc") if ensure_exists and not lrc_path.exists(): return None return lrc_path