finish renaming

2026-03-31 22:34:15 +02:00
parent 4d48e21533
commit 88cf0115af
56 changed files with 35 additions and 35 deletions
@@ -0,0 +1,178 @@
+"""
+Author: Uyanide pywang0608@foxmail.com
+Date: 2026-03-25 21:54:01
+Description: Shared LRC time-tag utilities (definitely overengineered)
+"""
+
+import re
+from pathlib import Path
+from typing import Optional
+from urllib.parse import unquote
+
+from .models import CacheStatus
+
+# Parses any time tag input format:
+#   [mm:ss], [mm:ss.c], [mm:ss.cc], [mm:ss.ccc], [mm:ss:cc], …
+_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
+
+# Standard format after normalization: [mm:ss.cc]
+_STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
+
+# Standard format with capture groups
+_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
+
+# Matches a standard time tag at the start of a line
+_LRC_LINE_RE = re.compile(r"^\[\d{2,}:\d{2}\.\d{2}\]", re.MULTILINE)
+
+# [offset:+/-xxx] tag — value in milliseconds
+_OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE)
+
+
+def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
+    """Convert parsed time tag components to standard [mm:ss.cc] string."""
+    if frac is None:
+        ms = 0
+    else:
+        # cc in [mm:ss:cc] is also treated as centiseconds, per LRC spec
+        #             ^
+        # why does this format even exist, idk
+        n = len(frac)
+        if n == 1:
+            ms = int(frac) * 100
+        elif n == 2:
+            ms = int(frac) * 10
+        else:
+            ms = int(frac)
+    cs = min(round(ms / 10), 99)
+    return f"[{mm}:{ss}.{cs:02d}]"
+
+
+def _reformat(text: str) -> str:
+    """Parse each line and reformat to standard [mm:ss.cc]...content form.
+
+    Handles any mix of time tag formats on input. Lines with no time tags
+    are stripped of leading/trailing whitespace and passed through unchanged.
+    """
+    out: list[str] = []
+    for line in text.splitlines():
+        line = line.strip()
+        pos = 0
+        tags: list[str] = []
+        while True:
+            while pos < len(line) and line[pos] == " ":
+                pos += 1
+            m = _RAW_TAG_RE.match(line, pos)
+            # Non-time tags are passed through as-is, except for leading/trailing whitespace which is stripped.
+            if not m:
+                # No more tags on this line
+                break
+            tags.append(_raw_tag_to_cs(m.group(1), m.group(2), m.group(3)))
+            pos = m.end()
+        if tags:
+            # This could break lyric lines of some kind of word-synced LRC format,
+            # but such format were not planned to be supported in the first place, so…
+            out.append("".join(tags) + line[pos:].lstrip())
+        else:
+            out.append(line)
+            # Empty lines with no tags are also preserved
+    return "\n".join(out)
+
+
+def _apply_offset(text: str) -> str:
+    """Parse [offset:±ms] and shift all standard [mm:ss.cc] tags accordingly.
+
+    Per LRC spec, positive offset = lyrics appear sooner (subtract from timestamps).
+    """
+    m = _OFFSET_RE.search(text)
+    if not m:
+        return text
+    offset_ms = int(m.group(1))
+    text = _OFFSET_RE.sub("", text).strip("\n")
+    if offset_ms == 0:
+        return text
+
+    def _shift(match: re.Match) -> str:
+        total_ms = max(
+            0,
+            (int(match.group(1)) * 60 + int(match.group(2))) * 1000
+            + int(match.group(3)) * 10
+            - offset_ms,
+        )
+        new_mm = total_ms // 60000
+        new_ss = (total_ms % 60000) // 1000
+        new_cs = min(round((total_ms % 1000) / 10), 99)
+        return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:02d}]"
+
+    return _STD_TAG_CAPTURE_RE.sub(_shift, text)
+
+
+def normalize_tags(text: str) -> str:
+    """Normalize LRC to standard form: reformat all tags to [mm:ss.cc], then apply offset."""
+    return _apply_offset(_reformat(text))
+
+
+def is_synced(text: str) -> bool:
+    """Check whether text contains non-zero LRC time tags.
+
+    Assumes text has been normalized by normalize_tags (standard [mm:ss.cc] format).
+    """
+    tags = _STD_TAG_RE.findall(text)
+    return bool(tags) and any(tag != "[00:00.00]" for tag in tags)
+
+
+def detect_sync_status(text: str) -> CacheStatus:
+    """Determine whether lyrics contain meaningful LRC time tags.
+
+    Assumes text has been normalized by normalize_tags.
+    """
+    return (
+        CacheStatus.SUCCESS_SYNCED if is_synced(text) else CacheStatus.SUCCESS_UNSYNCED
+    )
+
+
+def normalize_unsynced(lyrics: str) -> str:
+    """Normalize unsynced lyrics so every line has a [00:00.00] tag.
+
+    - Lines that already have time tags: replace with [00:00.00]
+    - Lines without time tags: prepend [00:00.00]
+    - Blank lines are converted to [00:00.00]
+    """
+    out: list[str] = []
+    for line in lyrics.splitlines():
+        stripped = line.strip()
+        if not stripped:
+            out.append("[00:00.00]")
+            continue
+        cleaned = _LRC_LINE_RE.sub("", stripped)
+        while _LRC_LINE_RE.match(cleaned):
+            cleaned = _LRC_LINE_RE.sub("", cleaned)
+        out.append(f"[00:00.00]{cleaned}")
+    return "\n".join(out)
+
+
+def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
+    """Convert file:// URL to Path, return None if invalid or (if ensure_exists) file doesn't exist."""
+    if not audio_url.startswith("file://"):
+        return None
+    file_path = unquote(audio_url.replace("file://", "", 1))
+    path = Path(file_path)
+    if ensure_exists and not path.exists():
+        return None
+    return path
+
+
+def get_sidecar_path(
+    audio_url: str, ensure_audio_exists: bool = False, ensure_exists: bool = False
+) -> Optional[Path]:
+    """Given a file:// URL, return the corresponding .lrc sidecar path.
+
+    If ensure_audio_exists is True, return None if the audio file does not exist.
+    If ensure_exists is True, return None if the .lrc file does not exist.
+    """
+    audio_path = get_audio_path(audio_url, ensure_exists=ensure_audio_exists)
+    if not audio_path:
+        return None
+    lrc_path = audio_path.with_suffix(".lrc")
+    if ensure_exists and not lrc_path.exists():
+        return None
+    return lrc_path