feat: better LRC format handling
This commit is contained in:
+3
-32
@@ -18,32 +18,12 @@ from loguru import logger
|
|||||||
from .fetchers import FetcherMethodType, create_fetchers
|
from .fetchers import FetcherMethodType, create_fetchers
|
||||||
from .fetchers.base import BaseFetcher
|
from .fetchers.base import BaseFetcher
|
||||||
from .cache import CacheEngine
|
from .cache import CacheEngine
|
||||||
from .lrc import LRC_LINE_RE, normalize_tags
|
from .lrc import normalize_unsynced
|
||||||
from .config import TTL_SYNCED, TTL_UNSYNCED, TTL_NOT_FOUND, TTL_NETWORK_ERROR
|
from .config import TTL_SYNCED, TTL_UNSYNCED, TTL_NOT_FOUND, TTL_NETWORK_ERROR
|
||||||
from .models import TrackMeta, LyricResult, CacheStatus
|
from .models import TrackMeta, LyricResult, CacheStatus
|
||||||
from .enrichers import enrich_track
|
from .enrichers import enrich_track
|
||||||
|
|
||||||
|
|
||||||
def _normalize_unsynced(lyrics: str) -> str:
|
|
||||||
"""Normalize unsynced lyrics so every line has a [00:00.00] tag.
|
|
||||||
|
|
||||||
- Lines that already have time tags: replace with [00:00.00]
|
|
||||||
- Lines without time tags: prepend [00:00.00]
|
|
||||||
- Blank lines are kept as-is
|
|
||||||
"""
|
|
||||||
out: list[str] = []
|
|
||||||
for line in lyrics.splitlines():
|
|
||||||
stripped = line.strip()
|
|
||||||
if not stripped:
|
|
||||||
out.append("")
|
|
||||||
continue
|
|
||||||
cleaned = LRC_LINE_RE.sub("", stripped)
|
|
||||||
while LRC_LINE_RE.match(cleaned):
|
|
||||||
cleaned = LRC_LINE_RE.sub("", cleaned)
|
|
||||||
out.append(f"[00:00.00]{cleaned}")
|
|
||||||
return "\n".join(out)
|
|
||||||
|
|
||||||
|
|
||||||
# Maps CacheStatus to the default TTL used when storing results
|
# Maps CacheStatus to the default TTL used when storing results
|
||||||
_STATUS_TTL: dict[CacheStatus, Optional[int]] = {
|
_STATUS_TTL: dict[CacheStatus, Optional[int]] = {
|
||||||
CacheStatus.SUCCESS_SYNCED: TTL_SYNCED,
|
CacheStatus.SUCCESS_SYNCED: TTL_SYNCED,
|
||||||
@@ -149,16 +129,7 @@ class LrcManager:
|
|||||||
logger.debug(f"[{source}] returned None (no result)")
|
logger.debug(f"[{source}] returned None (no result)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Normalize non-standard time tags [mm:ss:cc] → [mm:ss.cc]
|
# Cache the result (skip for self-cached fetchers)
|
||||||
if result.lyrics:
|
|
||||||
result = LyricResult(
|
|
||||||
status=result.status,
|
|
||||||
lyrics=normalize_tags(result.lyrics),
|
|
||||||
source=result.source,
|
|
||||||
ttl=result.ttl,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Cache the normalized result (skip for self-cached fetchers)
|
|
||||||
if not fetcher.self_cached:
|
if not fetcher.self_cached:
|
||||||
ttl = result.ttl or _STATUS_TTL.get(result.status, TTL_NOT_FOUND)
|
ttl = result.ttl or _STATUS_TTL.get(result.status, TTL_NOT_FOUND)
|
||||||
self.cache.set(track, source, result, ttl_seconds=ttl)
|
self.cache.set(track, source, result, ttl_seconds=ttl)
|
||||||
@@ -184,7 +155,7 @@ class LrcManager:
|
|||||||
):
|
):
|
||||||
best_result = LyricResult(
|
best_result = LyricResult(
|
||||||
status=best_result.status,
|
status=best_result.status,
|
||||||
lyrics=_normalize_unsynced(best_result.lyrics),
|
lyrics=normalize_unsynced(best_result.lyrics),
|
||||||
source=best_result.source,
|
source=best_result.source,
|
||||||
ttl=best_result.ttl,
|
ttl=best_result.ttl,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from mutagen.flac import FLAC
|
|||||||
|
|
||||||
from .base import BaseFetcher
|
from .base import BaseFetcher
|
||||||
from ..models import TrackMeta, LyricResult
|
from ..models import TrackMeta, LyricResult
|
||||||
from ..lrc import detect_sync_status, get_audio_path, get_sidecar_path
|
from ..lrc import detect_sync_status, normalize_tags, get_audio_path, get_sidecar_path
|
||||||
|
|
||||||
|
|
||||||
class LocalFetcher(BaseFetcher):
|
class LocalFetcher(BaseFetcher):
|
||||||
@@ -45,6 +45,7 @@ class LocalFetcher(BaseFetcher):
|
|||||||
with open(lrc_path, "r", encoding="utf-8") as f:
|
with open(lrc_path, "r", encoding="utf-8") as f:
|
||||||
content = f.read().strip()
|
content = f.read().strip()
|
||||||
if content:
|
if content:
|
||||||
|
content = normalize_tags(content)
|
||||||
status = detect_sync_status(content)
|
status = detect_sync_status(content)
|
||||||
logger.info(f"Local: found .lrc sidecar ({status.value})")
|
logger.info(f"Local: found .lrc sidecar ({status.value})")
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
@@ -77,11 +78,12 @@ class LocalFetcher(BaseFetcher):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if lyrics:
|
if lyrics:
|
||||||
|
lyrics = normalize_tags(lyrics.strip())
|
||||||
status = detect_sync_status(lyrics)
|
status = detect_sync_status(lyrics)
|
||||||
logger.info(f"Local: found embedded lyrics ({status.value})")
|
logger.info(f"Local: found embedded lyrics ({status.value})")
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
status=status,
|
status=status,
|
||||||
lyrics=lyrics.strip(),
|
lyrics=lyrics,
|
||||||
source=f"{self.source_name} (embedded)",
|
source=f"{self.source_name} (embedded)",
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from urllib.parse import urlencode
|
|||||||
|
|
||||||
from .base import BaseFetcher
|
from .base import BaseFetcher
|
||||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||||
|
from ..lrc import normalize_tags
|
||||||
from ..config import (
|
from ..config import (
|
||||||
HTTP_TIMEOUT,
|
HTTP_TIMEOUT,
|
||||||
TTL_UNSYNCED,
|
TTL_UNSYNCED,
|
||||||
@@ -75,21 +76,23 @@ class LrclibFetcher(BaseFetcher):
|
|||||||
unsynced = data.get("plainLyrics")
|
unsynced = data.get("plainLyrics")
|
||||||
|
|
||||||
if isinstance(synced, str) and synced.strip():
|
if isinstance(synced, str) and synced.strip():
|
||||||
|
lyrics = normalize_tags(synced.strip())
|
||||||
logger.info(
|
logger.info(
|
||||||
f"LRCLIB: got synced lyrics ({len(synced.splitlines())} lines)"
|
f"LRCLIB: got synced lyrics ({len(lyrics.splitlines())} lines)"
|
||||||
)
|
)
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
status=CacheStatus.SUCCESS_SYNCED,
|
status=CacheStatus.SUCCESS_SYNCED,
|
||||||
lyrics=synced.strip(),
|
lyrics=lyrics,
|
||||||
source=self.source_name,
|
source=self.source_name,
|
||||||
)
|
)
|
||||||
elif isinstance(unsynced, str) and unsynced.strip():
|
elif isinstance(unsynced, str) and unsynced.strip():
|
||||||
|
lyrics = normalize_tags(unsynced.strip())
|
||||||
logger.info(
|
logger.info(
|
||||||
f"LRCLIB: got unsynced lyrics ({len(unsynced.splitlines())} lines)"
|
f"LRCLIB: got unsynced lyrics ({len(lyrics.splitlines())} lines)"
|
||||||
)
|
)
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
status=CacheStatus.SUCCESS_UNSYNCED,
|
status=CacheStatus.SUCCESS_UNSYNCED,
|
||||||
lyrics=unsynced.strip(),
|
lyrics=lyrics,
|
||||||
source=self.source_name,
|
source=self.source_name,
|
||||||
ttl=TTL_UNSYNCED,
|
ttl=TTL_UNSYNCED,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from urllib.parse import urlencode
|
|||||||
|
|
||||||
from .base import BaseFetcher
|
from .base import BaseFetcher
|
||||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||||
|
from ..lrc import normalize_tags
|
||||||
from ..config import (
|
from ..config import (
|
||||||
HTTP_TIMEOUT,
|
HTTP_TIMEOUT,
|
||||||
TTL_UNSYNCED,
|
TTL_UNSYNCED,
|
||||||
@@ -78,21 +79,23 @@ class LrclibSearchFetcher(BaseFetcher):
|
|||||||
unsynced = best.get("plainLyrics")
|
unsynced = best.get("plainLyrics")
|
||||||
|
|
||||||
if isinstance(synced, str) and synced.strip():
|
if isinstance(synced, str) and synced.strip():
|
||||||
|
lyrics = normalize_tags(synced.strip())
|
||||||
logger.info(
|
logger.info(
|
||||||
f"LRCLIB-search: got synced lyrics ({len(synced.splitlines())} lines)"
|
f"LRCLIB-search: got synced lyrics ({len(lyrics.splitlines())} lines)"
|
||||||
)
|
)
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
status=CacheStatus.SUCCESS_SYNCED,
|
status=CacheStatus.SUCCESS_SYNCED,
|
||||||
lyrics=synced.strip(),
|
lyrics=lyrics,
|
||||||
source=self.source_name,
|
source=self.source_name,
|
||||||
)
|
)
|
||||||
elif isinstance(unsynced, str) and unsynced.strip():
|
elif isinstance(unsynced, str) and unsynced.strip():
|
||||||
|
lyrics = normalize_tags(unsynced.strip())
|
||||||
logger.info(
|
logger.info(
|
||||||
f"LRCLIB-search: got unsynced lyrics ({len(unsynced.splitlines())} lines)"
|
f"LRCLIB-search: got unsynced lyrics ({len(lyrics.splitlines())} lines)"
|
||||||
)
|
)
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
status=CacheStatus.SUCCESS_UNSYNCED,
|
status=CacheStatus.SUCCESS_UNSYNCED,
|
||||||
lyrics=unsynced.strip(),
|
lyrics=lyrics,
|
||||||
source=self.source_name,
|
source=self.source_name,
|
||||||
ttl=TTL_UNSYNCED,
|
ttl=TTL_UNSYNCED,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from loguru import logger
|
|||||||
|
|
||||||
from .base import BaseFetcher
|
from .base import BaseFetcher
|
||||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||||
from ..lrc import is_synced
|
from ..lrc import detect_sync_status, normalize_tags
|
||||||
from ..config import (
|
from ..config import (
|
||||||
HTTP_TIMEOUT,
|
HTTP_TIMEOUT,
|
||||||
TTL_NOT_FOUND,
|
TTL_NOT_FOUND,
|
||||||
@@ -178,10 +178,8 @@ class NeteaseFetcher(BaseFetcher):
|
|||||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||||
|
|
||||||
# Determine sync status
|
# Determine sync status
|
||||||
synced = is_synced(lrc)
|
lrc = normalize_tags(lrc)
|
||||||
status = (
|
status = detect_sync_status(lrc)
|
||||||
CacheStatus.SUCCESS_SYNCED if synced else CacheStatus.SUCCESS_UNSYNCED
|
|
||||||
)
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Netease: got {status.value} lyrics for song_id={song_id} "
|
f"Netease: got {status.value} lyrics for song_id={song_id} "
|
||||||
f"({len(lrc.splitlines())} lines)"
|
f"({len(lrc.splitlines())} lines)"
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from loguru import logger
|
|||||||
|
|
||||||
from .base import BaseFetcher
|
from .base import BaseFetcher
|
||||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||||
from ..lrc import is_synced
|
from ..lrc import detect_sync_status, normalize_tags
|
||||||
from ..config import (
|
from ..config import (
|
||||||
HTTP_TIMEOUT,
|
HTTP_TIMEOUT,
|
||||||
TTL_NOT_FOUND,
|
TTL_NOT_FOUND,
|
||||||
@@ -139,10 +139,8 @@ class QQMusicFetcher(BaseFetcher):
|
|||||||
logger.debug(f"QQMusic: empty lyrics for mid={mid}")
|
logger.debug(f"QQMusic: empty lyrics for mid={mid}")
|
||||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||||
|
|
||||||
synced = is_synced(lrc)
|
lrc = normalize_tags(lrc)
|
||||||
status = (
|
status = detect_sync_status(lrc)
|
||||||
CacheStatus.SUCCESS_SYNCED if synced else CacheStatus.SUCCESS_UNSYNCED
|
|
||||||
)
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"QQMusic: got {status.value} lyrics for mid={mid} "
|
f"QQMusic: got {status.value} lyrics for mid={mid} "
|
||||||
f"({len(lrc.splitlines())} lines)"
|
f"({len(lrc.splitlines())} lines)"
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ from loguru import logger
|
|||||||
|
|
||||||
from .base import BaseFetcher
|
from .base import BaseFetcher
|
||||||
from ..models import TrackMeta, LyricResult, CacheStatus
|
from ..models import TrackMeta, LyricResult, CacheStatus
|
||||||
|
from ..lrc import normalize_tags
|
||||||
from ..config import (
|
from ..config import (
|
||||||
HTTP_TIMEOUT,
|
HTTP_TIMEOUT,
|
||||||
SPOTIFY_APP_VERSION,
|
SPOTIFY_APP_VERSION,
|
||||||
@@ -354,7 +355,7 @@ class SpotifyFetcher(BaseFetcher):
|
|||||||
# Unsynced: emit with zero timestamps
|
# Unsynced: emit with zero timestamps
|
||||||
lrc_lines.append(f"[00:00.00]{words}")
|
lrc_lines.append(f"[00:00.00]{words}")
|
||||||
|
|
||||||
content = "\n".join(lrc_lines)
|
content = normalize_tags("\n".join(lrc_lines))
|
||||||
status = (
|
status = (
|
||||||
CacheStatus.SUCCESS_SYNCED
|
CacheStatus.SUCCESS_SYNCED
|
||||||
if is_synced
|
if is_synced
|
||||||
|
|||||||
+101
-49
@@ -1,7 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Author: Uyanide pywang0608@foxmail.com
|
Author: Uyanide pywang0608@foxmail.com
|
||||||
Date: 2026-03-25 21:54:01
|
Date: 2026-03-25 21:54:01
|
||||||
Description: Shared LRC time-tag utilities
|
Description: Shared LRC time-tag utilities (definitely overengineered)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@@ -11,93 +11,145 @@ from urllib.parse import unquote
|
|||||||
|
|
||||||
from .models import CacheStatus
|
from .models import CacheStatus
|
||||||
|
|
||||||
# Standard format: [mm:ss.cc] or [mm:ss.ccc]
|
# Parses any time tag input format:
|
||||||
_STANDARD_TAG_RE = re.compile(r"\[\d{2}:\d{2}\.\d{2,3}\]")
|
# [mm:ss], [mm:ss.c], [mm:ss.cc], [mm:ss.ccc], [mm:ss:cc], …
|
||||||
|
_RAW_TAG_RE = re.compile(r"\[(\d{2,}):(\d{2})(?:[.:](\d{1,3}))?\]")
|
||||||
|
|
||||||
# Non-standard format: [mm:ss:cc] (two colons instead of dot)
|
# Standard format after normalization: [mm:ss.cc]
|
||||||
_COLON_TAG_RE = re.compile(r"\[(\d{2}:\d{2}):(\d{2,3})\]")
|
_STD_TAG_RE = re.compile(r"\[\d{2,}:\d{2}\.\d{2}\]")
|
||||||
|
|
||||||
# Matches any LRC time tag (standard or non-standard) at start of line
|
# Standard format with capture groups
|
||||||
LRC_LINE_RE = re.compile(r"^\[(\d{2}:\d{2}[.:]\d{2,3})\]", re.MULTILINE)
|
_STD_TAG_CAPTURE_RE = re.compile(r"\[(\d{2,}):(\d{2})\.(\d{2})\]")
|
||||||
|
|
||||||
# All-zero tags
|
# Matches a standard time tag at the start of a line
|
||||||
_ZERO_TAG_RE = re.compile(r"^\[00:00[.:]0{2,3}\]$")
|
_LRC_LINE_RE = re.compile(r"^\[\d{2,}:\d{2}\.\d{2}\]", re.MULTILINE)
|
||||||
|
|
||||||
# [offset:+/-xxx] tag — value in milliseconds
|
# [offset:+/-xxx] tag — value in milliseconds
|
||||||
_OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE)
|
_OFFSET_RE = re.compile(r"^\[offset:\s*([+-]?\d+)\]\s*$", re.MULTILINE | re.IGNORECASE)
|
||||||
|
|
||||||
# Time tag for offset application: captures mm, ss, cc/ccc
|
|
||||||
_TIME_TAG_RE = re.compile(r"\[(\d{2}):(\d{2})\.(\d{2,3})\]")
|
def _raw_tag_to_cs(mm: str, ss: str, frac: Optional[str]) -> str:
|
||||||
|
"""Convert parsed time tag components to standard [mm:ss.cc] string."""
|
||||||
|
if frac is None:
|
||||||
|
ms = 0
|
||||||
|
else:
|
||||||
|
# cc in [mm:ss:cc] is also treated as centiseconds, per LRC spec
|
||||||
|
# ^
|
||||||
|
# why does this format even exist, idk
|
||||||
|
n = len(frac)
|
||||||
|
if n == 1:
|
||||||
|
ms = int(frac) * 100
|
||||||
|
elif n == 2:
|
||||||
|
ms = int(frac) * 10
|
||||||
|
else:
|
||||||
|
ms = int(frac)
|
||||||
|
cs = min(round(ms / 10), 99)
|
||||||
|
return f"[{mm}:{ss}.{cs:02d}]"
|
||||||
|
|
||||||
|
|
||||||
|
def _reformat(text: str) -> str:
|
||||||
|
"""Parse each line and reformat to standard [mm:ss.cc]...content form.
|
||||||
|
|
||||||
|
Handles any mix of time tag formats on input. Lines with no time tags
|
||||||
|
are stripped of leading/trailing whitespace and passed through unchanged.
|
||||||
|
"""
|
||||||
|
out: list[str] = []
|
||||||
|
for line in text.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
pos = 0
|
||||||
|
tags: list[str] = []
|
||||||
|
while True:
|
||||||
|
while pos < len(line) and line[pos] == " ":
|
||||||
|
pos += 1
|
||||||
|
m = _RAW_TAG_RE.match(line, pos)
|
||||||
|
# Non-time tags are passed through as-is, except for leading/trailing whitespace which is stripped.
|
||||||
|
if not m:
|
||||||
|
# No more tags on this line
|
||||||
|
break
|
||||||
|
tags.append(_raw_tag_to_cs(m.group(1), m.group(2), m.group(3)))
|
||||||
|
pos = m.end()
|
||||||
|
if tags:
|
||||||
|
# This could break lyric lines of some kind of word-synced LRC format,
|
||||||
|
# but such format were not planned to be supported in the first place, so…
|
||||||
|
out.append("".join(tags) + line[pos:].lstrip())
|
||||||
|
else:
|
||||||
|
out.append(line)
|
||||||
|
# Empty lines with no tags are also preserved
|
||||||
|
return "\n".join(out)
|
||||||
|
|
||||||
|
|
||||||
def _apply_offset(text: str) -> str:
|
def _apply_offset(text: str) -> str:
|
||||||
"""Parse [offset:±ms] tag and shift all time tags accordingly.
|
"""Parse [offset:±ms] and shift all standard [mm:ss.cc] tags accordingly.
|
||||||
|
|
||||||
Per LRC spec, a positive offset means lyrics appear sooner (subtract
|
Per LRC spec, positive offset = lyrics appear sooner (subtract from timestamps).
|
||||||
from timestamps), negative means later (add to timestamps).
|
|
||||||
"""
|
"""
|
||||||
m = _OFFSET_RE.search(text)
|
m = _OFFSET_RE.search(text)
|
||||||
if not m:
|
if not m:
|
||||||
return text
|
return text
|
||||||
offset_ms = int(m.group(1))
|
offset_ms = int(m.group(1))
|
||||||
|
text = _OFFSET_RE.sub("", text).strip("\n")
|
||||||
if offset_ms == 0:
|
if offset_ms == 0:
|
||||||
return _OFFSET_RE.sub("", text).strip("\n")
|
return text
|
||||||
|
|
||||||
# Remove the offset tag line
|
|
||||||
text = _OFFSET_RE.sub("", text)
|
|
||||||
|
|
||||||
def _shift(match: re.Match) -> str:
|
def _shift(match: re.Match) -> str:
|
||||||
mm, ss, cs = int(match.group(1)), int(match.group(2)), match.group(3)
|
total_ms = max(
|
||||||
# Normalize centiseconds to milliseconds
|
0,
|
||||||
if len(cs) == 2:
|
(int(match.group(1)) * 60 + int(match.group(2))) * 1000
|
||||||
ms = int(cs) * 10
|
+ int(match.group(3)) * 10
|
||||||
fmt_cs = 2
|
- offset_ms,
|
||||||
else:
|
)
|
||||||
ms = int(cs)
|
|
||||||
fmt_cs = 3
|
|
||||||
total_ms = (mm * 60 + ss) * 1000 + ms - offset_ms
|
|
||||||
total_ms = max(0, total_ms)
|
|
||||||
new_mm = total_ms // 60000
|
new_mm = total_ms // 60000
|
||||||
new_ss = (total_ms % 60000) // 1000
|
new_ss = (total_ms % 60000) // 1000
|
||||||
new_cs = total_ms % 1000
|
new_cs = min(round((total_ms % 1000) / 10), 99)
|
||||||
if fmt_cs == 2:
|
|
||||||
new_cs = new_cs // 10
|
|
||||||
return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:02d}]"
|
return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:02d}]"
|
||||||
return f"[{new_mm:02d}:{new_ss:02d}.{new_cs:03d}]"
|
|
||||||
|
|
||||||
return _TIME_TAG_RE.sub(_shift, text)
|
return _STD_TAG_CAPTURE_RE.sub(_shift, text)
|
||||||
|
|
||||||
|
|
||||||
def normalize_tags(text: str) -> str:
|
def normalize_tags(text: str) -> str:
|
||||||
"""Normalize LRC time tags: colon format → dot format, then apply offset."""
|
"""Normalize LRC to standard form: reformat all tags to [mm:ss.cc], then apply offset."""
|
||||||
text = _COLON_TAG_RE.sub(r"[\1.\2]", text)
|
return _apply_offset(_reformat(text))
|
||||||
return _apply_offset(text)
|
|
||||||
|
|
||||||
|
|
||||||
def is_synced(text: str) -> bool:
|
def is_synced(text: str) -> bool:
|
||||||
"""Check whether text contains actual LRC time tags with non-zero times.
|
"""Check whether text contains non-zero LRC time tags.
|
||||||
|
|
||||||
Returns False if no tags exist or all tags are [00:00.00].
|
Assumes text has been normalized by normalize_tags (standard [mm:ss.cc] format).
|
||||||
Handles both [mm:ss.cc] and [mm:ss:cc] formats.
|
|
||||||
"""
|
"""
|
||||||
tags = _STANDARD_TAG_RE.findall(text)
|
tags = _STD_TAG_RE.findall(text)
|
||||||
# Also check non-standard format
|
return bool(tags) and any(tag != "[00:00.00]" for tag in tags)
|
||||||
tags += [f"[{m.group(1)}.{m.group(2)}]" for m in _COLON_TAG_RE.finditer(text)]
|
|
||||||
if not tags:
|
|
||||||
return False
|
|
||||||
for tag in tags:
|
|
||||||
if not _ZERO_TAG_RE.match(tag):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def detect_sync_status(text: str) -> CacheStatus:
|
def detect_sync_status(text: str) -> CacheStatus:
|
||||||
"""Determine whether lyrics contain meaningful LRC time tags."""
|
"""Determine whether lyrics contain meaningful LRC time tags.
|
||||||
|
|
||||||
|
Assumes text has been normalized by normalize_tags.
|
||||||
|
"""
|
||||||
return (
|
return (
|
||||||
CacheStatus.SUCCESS_SYNCED if is_synced(text) else CacheStatus.SUCCESS_UNSYNCED
|
CacheStatus.SUCCESS_SYNCED if is_synced(text) else CacheStatus.SUCCESS_UNSYNCED
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_unsynced(lyrics: str) -> str:
|
||||||
|
"""Normalize unsynced lyrics so every line has a [00:00.00] tag.
|
||||||
|
|
||||||
|
- Lines that already have time tags: replace with [00:00.00]
|
||||||
|
- Lines without time tags: prepend [00:00.00]
|
||||||
|
- Blank lines are converted to [00:00.00]
|
||||||
|
"""
|
||||||
|
out: list[str] = []
|
||||||
|
for line in lyrics.splitlines():
|
||||||
|
stripped = line.strip()
|
||||||
|
if not stripped:
|
||||||
|
out.append("[00:00.00]")
|
||||||
|
continue
|
||||||
|
cleaned = _LRC_LINE_RE.sub("", stripped)
|
||||||
|
while _LRC_LINE_RE.match(cleaned):
|
||||||
|
cleaned = _LRC_LINE_RE.sub("", cleaned)
|
||||||
|
out.append(f"[00:00.00]{cleaned}")
|
||||||
|
return "\n".join(out)
|
||||||
|
|
||||||
|
|
||||||
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
|
def get_audio_path(audio_url: str, ensure_exists: bool = False) -> Optional[Path]:
|
||||||
"""Convert file:// URL to Path, return None if invalid or (if ensure_exists) file doesn't exist."""
|
"""Convert file:// URL to Path, return None if invalid or (if ensure_exists) file doesn't exist."""
|
||||||
if not audio_url.startswith("file://"):
|
if not audio_url.startswith("file://"):
|
||||||
|
|||||||
+1
-1
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "lrcfetch"
|
name = "lrcfetch"
|
||||||
version = "0.1.5"
|
version = "0.1.6"
|
||||||
description = "Fetch line-synced lyrics for your music player."
|
description = "Fetch line-synced lyrics for your music player."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
|
|||||||
Reference in New Issue
Block a user