fix: normalize time tags in fetched lrc (why [00:17:06]?)
This commit is contained in:
+14
-11
@@ -7,11 +7,11 @@ Fetch pipeline:
|
|||||||
4. Return the best result (synced > unsynced > None)
|
4. Return the best result (synced > unsynced > None)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from lrcfetch.models import TrackMeta, LyricResult, CacheStatus
|
from lrcfetch.models import TrackMeta, LyricResult, CacheStatus
|
||||||
from lrcfetch.config import TTL_SYNCED, TTL_UNSYNCED, TTL_NOT_FOUND, TTL_NETWORK_ERROR
|
from lrcfetch.config import TTL_SYNCED, TTL_UNSYNCED, TTL_NOT_FOUND, TTL_NETWORK_ERROR
|
||||||
|
from lrcfetch.lrc import LRC_LINE_RE, normalize_tags
|
||||||
from lrcfetch.cache import CacheEngine
|
from lrcfetch.cache import CacheEngine
|
||||||
from lrcfetch.fetchers.base import BaseFetcher
|
from lrcfetch.fetchers.base import BaseFetcher
|
||||||
from lrcfetch.fetchers.local import LocalFetcher
|
from lrcfetch.fetchers.local import LocalFetcher
|
||||||
@@ -20,10 +20,6 @@ from lrcfetch.fetchers.lrclib import LrclibFetcher
|
|||||||
from lrcfetch.fetchers.lrclib_search import LrclibSearchFetcher
|
from lrcfetch.fetchers.lrclib_search import LrclibSearchFetcher
|
||||||
from lrcfetch.fetchers.netease import NeteaseFetcher
|
from lrcfetch.fetchers.netease import NeteaseFetcher
|
||||||
|
|
||||||
# Matches any LRC time tag at the start of a line: [mm:ss.cc] or [mm:ss.ccc]
|
|
||||||
_LRC_LINE_RE = re.compile(r"^\[(\d{2}:\d{2}\.\d{2,3})\]", re.MULTILINE)
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_unsynced(lyrics: str) -> str:
|
def _normalize_unsynced(lyrics: str) -> str:
|
||||||
"""Normalize unsynced lyrics so every line has a [00:00.00] tag.
|
"""Normalize unsynced lyrics so every line has a [00:00.00] tag.
|
||||||
|
|
||||||
@@ -37,11 +33,9 @@ def _normalize_unsynced(lyrics: str) -> str:
|
|||||||
if not stripped:
|
if not stripped:
|
||||||
out.append("")
|
out.append("")
|
||||||
continue
|
continue
|
||||||
# Strip existing time tag(s) from the beginning
|
cleaned = LRC_LINE_RE.sub("", stripped)
|
||||||
cleaned = _LRC_LINE_RE.sub("", stripped)
|
while LRC_LINE_RE.match(cleaned):
|
||||||
# Could have multiple tags like [00:12.34][00:56.78]text
|
cleaned = LRC_LINE_RE.sub("", cleaned)
|
||||||
while _LRC_LINE_RE.match(cleaned):
|
|
||||||
cleaned = _LRC_LINE_RE.sub("", cleaned)
|
|
||||||
out.append(f"[00:00.00]{cleaned}")
|
out.append(f"[00:00.00]{cleaned}")
|
||||||
return "\n".join(out)
|
return "\n".join(out)
|
||||||
|
|
||||||
@@ -148,7 +142,16 @@ class LrcManager:
|
|||||||
logger.debug(f"[{source}] returned None (no result)")
|
logger.debug(f"[{source}] returned None (no result)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Cache the result
|
# Normalize non-standard time tags [mm:ss:cc] → [mm:ss.cc]
|
||||||
|
if result.lyrics:
|
||||||
|
result = LyricResult(
|
||||||
|
status=result.status,
|
||||||
|
lyrics=normalize_tags(result.lyrics),
|
||||||
|
source=result.source,
|
||||||
|
ttl=result.ttl,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache the normalized result
|
||||||
ttl = result.ttl or _STATUS_TTL.get(result.status, TTL_NOT_FOUND)
|
ttl = result.ttl or _STATUS_TTL.get(result.status, TTL_NOT_FOUND)
|
||||||
self.cache.set(track, source, result, ttl_seconds=ttl)
|
self.cache.set(track, source, result, ttl_seconds=ttl)
|
||||||
|
|
||||||
|
|||||||
@@ -5,34 +5,15 @@ Priority:
|
|||||||
2. Embedded lyrics in audio metadata (FLAC, MP3 USLT/SYLT tags)
|
2. Embedded lyrics in audio metadata (FLAC, MP3 USLT/SYLT tags)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from lrcfetch.models import TrackMeta, LyricResult, CacheStatus
|
from lrcfetch.models import TrackMeta, LyricResult, CacheStatus
|
||||||
from lrcfetch.fetchers.base import BaseFetcher
|
from lrcfetch.fetchers.base import BaseFetcher
|
||||||
|
from lrcfetch.lrc import detect_sync_status
|
||||||
from mutagen._file import File
|
from mutagen._file import File
|
||||||
from mutagen.flac import FLAC
|
from mutagen.flac import FLAC
|
||||||
|
|
||||||
# Matches LRC time tags like [00:12.34] or [01:23.456]
|
|
||||||
_LRC_TIME_TAG_RE = re.compile(r"\[\d{2}:\d{2}\.\d{2,3}\]")
|
|
||||||
# Matches time tags that are all zeros
|
|
||||||
_ZERO_TIME_TAG_RE = re.compile(r"^\[00:00\.0{2,3}\]$")
|
|
||||||
|
|
||||||
|
|
||||||
def _detect_sync_status(text: str) -> CacheStatus:
|
|
||||||
"""Determine whether lyrics text contains meaningful LRC time tags.
|
|
||||||
|
|
||||||
Returns UNSYNCED if no tags exist or all tags are [00:00.00].
|
|
||||||
"""
|
|
||||||
tags = _LRC_TIME_TAG_RE.findall(text)
|
|
||||||
if not tags:
|
|
||||||
return CacheStatus.SUCCESS_UNSYNCED
|
|
||||||
for tag in tags:
|
|
||||||
if not _ZERO_TIME_TAG_RE.match(tag):
|
|
||||||
return CacheStatus.SUCCESS_SYNCED
|
|
||||||
return CacheStatus.SUCCESS_UNSYNCED
|
|
||||||
|
|
||||||
|
|
||||||
class LocalFetcher(BaseFetcher):
|
class LocalFetcher(BaseFetcher):
|
||||||
@property
|
@property
|
||||||
@@ -58,7 +39,7 @@ class LocalFetcher(BaseFetcher):
|
|||||||
with open(lrc_path, "r", encoding="utf-8") as f:
|
with open(lrc_path, "r", encoding="utf-8") as f:
|
||||||
content = f.read().strip()
|
content = f.read().strip()
|
||||||
if content:
|
if content:
|
||||||
status = _detect_sync_status(content)
|
status = detect_sync_status(content)
|
||||||
logger.info(f"Local: found .lrc sidecar ({status.value})")
|
logger.info(f"Local: found .lrc sidecar ({status.value})")
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
status=status, lyrics=content, source=self.source_name
|
status=status, lyrics=content, source=self.source_name
|
||||||
@@ -83,7 +64,7 @@ class LocalFetcher(BaseFetcher):
|
|||||||
break
|
break
|
||||||
|
|
||||||
if lyrics:
|
if lyrics:
|
||||||
status = _detect_sync_status(lyrics)
|
status = detect_sync_status(lyrics)
|
||||||
logger.info(f"Local: found embedded lyrics ({status.value})")
|
logger.info(f"Local: found embedded lyrics ({status.value})")
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
status=status,
|
status=status,
|
||||||
|
|||||||
@@ -7,12 +7,12 @@ Search results are filtered by duration when the track has a known length
|
|||||||
to avoid returning lyrics for the wrong version of a song.
|
to avoid returning lyrics for the wrong version of a song.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
import httpx
|
import httpx
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from lrcfetch.models import TrackMeta, LyricResult, CacheStatus
|
from lrcfetch.models import TrackMeta, LyricResult, CacheStatus
|
||||||
from lrcfetch.fetchers.base import BaseFetcher
|
from lrcfetch.fetchers.base import BaseFetcher
|
||||||
|
from lrcfetch.lrc import is_synced
|
||||||
from lrcfetch.config import (
|
from lrcfetch.config import (
|
||||||
HTTP_TIMEOUT,
|
HTTP_TIMEOUT,
|
||||||
TTL_NOT_FOUND,
|
TTL_NOT_FOUND,
|
||||||
@@ -23,34 +23,12 @@ from lrcfetch.config import (
|
|||||||
UA_BROWSER,
|
UA_BROWSER,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Matches LRC time tags like [00:12.34] or [01:23.456]
|
|
||||||
_LRC_TIME_TAG_RE = re.compile(r"\[\d{2}:\d{2}\.\d{2,3}\]")
|
|
||||||
# Matches time tags that are all zeros: [00:00.00] or [00:00.000]
|
|
||||||
_ZERO_TIME_TAG_RE = re.compile(r"^\[00:00\.0{2,3}\]")
|
|
||||||
|
|
||||||
_HEADERS = {
|
_HEADERS = {
|
||||||
"User-Agent": UA_BROWSER,
|
"User-Agent": UA_BROWSER,
|
||||||
"Referer": "https://music.163.com/",
|
"Referer": "https://music.163.com/",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _is_synced_lrc(text: str) -> bool:
|
|
||||||
"""Check whether *text* contains actual LRC time tags with non-zero times.
|
|
||||||
|
|
||||||
Returns False if:
|
|
||||||
- No time tags at all
|
|
||||||
- All time tags are [00:00.00] (unsynced disguised as synced)
|
|
||||||
"""
|
|
||||||
lines_with_tags = _LRC_TIME_TAG_RE.findall(text)
|
|
||||||
if not lines_with_tags:
|
|
||||||
return False
|
|
||||||
# Check if ALL tags are zero — if so, it's unsynced
|
|
||||||
for tag in lines_with_tags:
|
|
||||||
if not _ZERO_TIME_TAG_RE.match(tag):
|
|
||||||
return True # Found at least one non-zero tag
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class NeteaseFetcher(BaseFetcher):
|
class NeteaseFetcher(BaseFetcher):
|
||||||
@property
|
@property
|
||||||
def source_name(self) -> str:
|
def source_name(self) -> str:
|
||||||
@@ -186,7 +164,7 @@ class NeteaseFetcher(BaseFetcher):
|
|||||||
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
return LyricResult(status=CacheStatus.NOT_FOUND, ttl=TTL_NOT_FOUND)
|
||||||
|
|
||||||
# Determine sync status
|
# Determine sync status
|
||||||
synced = _is_synced_lrc(lrc)
|
synced = is_synced(lrc)
|
||||||
status = CacheStatus.SUCCESS_SYNCED if synced else CacheStatus.SUCCESS_UNSYNCED
|
status = CacheStatus.SUCCESS_SYNCED if synced else CacheStatus.SUCCESS_UNSYNCED
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Netease: got {status.value} lyrics for song_id={song_id} "
|
f"Netease: got {status.value} lyrics for song_id={song_id} "
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
"""Shared LRC time-tag utilities.
|
||||||
|
|
||||||
|
Handles detection, normalization, and sync-status checks for LRC lyrics.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from lrcfetch.models import CacheStatus
|
||||||
|
|
||||||
|
# Standard format: [mm:ss.cc] or [mm:ss.ccc]
|
||||||
|
_STANDARD_TAG_RE = re.compile(r"\[\d{2}:\d{2}\.\d{2,3}\]")
|
||||||
|
|
||||||
|
# Non-standard format: [mm:ss:cc] (two colons instead of dot)
|
||||||
|
_COLON_TAG_RE = re.compile(r"\[(\d{2}:\d{2}):(\d{2,3})\]")
|
||||||
|
|
||||||
|
# Matches any LRC time tag (standard or non-standard) at start of line
|
||||||
|
LRC_LINE_RE = re.compile(r"^\[(\d{2}:\d{2}[.:]\d{2,3})\]", re.MULTILINE)
|
||||||
|
|
||||||
|
# All-zero tags
|
||||||
|
_ZERO_TAG_RE = re.compile(r"^\[00:00[.:]0{2,3}\]$")
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_tags(text: str) -> str:
|
||||||
|
"""Convert non-standard time tags [mm:ss:cc] to standard [mm:ss.cc]."""
|
||||||
|
return _COLON_TAG_RE.sub(r"[\1.\2]", text)
|
||||||
|
|
||||||
|
|
||||||
|
def is_synced(text: str) -> bool:
|
||||||
|
"""Check whether text contains actual LRC time tags with non-zero times.
|
||||||
|
|
||||||
|
Returns False if no tags exist or all tags are [00:00.00].
|
||||||
|
Handles both [mm:ss.cc] and [mm:ss:cc] formats.
|
||||||
|
"""
|
||||||
|
tags = _STANDARD_TAG_RE.findall(text)
|
||||||
|
# Also check non-standard format
|
||||||
|
tags += [f"[{m.group(1)}.{m.group(2)}]" for m in _COLON_TAG_RE.finditer(text)]
|
||||||
|
if not tags:
|
||||||
|
return False
|
||||||
|
for tag in tags:
|
||||||
|
if not _ZERO_TAG_RE.match(tag):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def detect_sync_status(text: str) -> CacheStatus:
|
||||||
|
"""Determine whether lyrics contain meaningful LRC time tags."""
|
||||||
|
return CacheStatus.SUCCESS_SYNCED if is_synced(text) else CacheStatus.SUCCESS_UNSYNCED
|
||||||
Reference in New Issue
Block a user