From 4e83e6be1522159818315640d0d1ec40b893c938 Mon Sep 17 00:00:00 2001 From: Uyanide Date: Tue, 31 Mar 2026 06:08:16 +0200 Subject: [PATCH] feat: add metadata enrichers & refactor --- README.md | 6 +++ lrcfetch/__init__.py | 2 +- lrcfetch/cli.py | 25 ++++++++- lrcfetch/core.py | 56 ++++---------------- lrcfetch/enrichers/__init__.py | 39 ++++++++++++++ lrcfetch/enrichers/audio_tag.py | 78 ++++++++++++++++++++++++++++ lrcfetch/enrichers/base.py | 31 +++++++++++ lrcfetch/enrichers/file_name.py | 83 ++++++++++++++++++++++++++++++ lrcfetch/fetchers/__init__.py | 54 +++++++++++++++++++ lrcfetch/fetchers/base.py | 9 +++- lrcfetch/fetchers/cache_search.py | 12 ++++- lrcfetch/fetchers/local.py | 4 +- lrcfetch/fetchers/lrclib.py | 4 +- lrcfetch/fetchers/lrclib_search.py | 4 +- lrcfetch/fetchers/netease.py | 4 +- lrcfetch/fetchers/qqmusic.py | 4 +- lrcfetch/fetchers/spotify.py | 4 +- pyproject.toml | 2 +- uv.lock | 2 +- 19 files changed, 363 insertions(+), 60 deletions(-) create mode 100644 lrcfetch/enrichers/__init__.py create mode 100644 lrcfetch/enrichers/audio_tag.py create mode 100644 lrcfetch/enrichers/base.py create mode 100644 lrcfetch/enrichers/file_name.py diff --git a/README.md b/README.md index 07c7510..b7e889b 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,12 @@ See `lrcfetch --help` for full command reference. Common use cases: lrcfetch search --trackid "5p0ietGkLNEqx1Z7ijkw5g" ``` + or for a local file: + + ```bash + lrcfetch search --path "/path/to/Westlife - My Love.flac" + ``` + - Export to sidecar `.lrc` file: ```bash diff --git a/lrcfetch/__init__.py b/lrcfetch/__init__.py index bbab024..1276d02 100644 --- a/lrcfetch/__init__.py +++ b/lrcfetch/__init__.py @@ -1 +1 @@ -__version__ = "0.1.4" +__version__ = "0.1.5" diff --git a/lrcfetch/cli.py b/lrcfetch/cli.py index 0ddc2bf..775bb10 100644 --- a/lrcfetch/cli.py +++ b/lrcfetch/cli.py @@ -7,14 +7,17 @@ Description: CLI interface import sys import time import os +from pathlib import Path from typing import Annotated +from urllib.parse import quote import cyclopts from loguru import logger from .config import enable_debug from .models import TrackMeta, CacheStatus from .mpris import get_current_track -from .core import LrcManager, FetcherMethodType +from .core import LrcManager +from .fetchers import FetcherMethodType from .lrc import get_sidecar_path @@ -122,7 +125,17 @@ def search( ), ] = None, url: Annotated[ - str | None, cyclopts.Parameter(help="Local file URL (file:///...).") + str | None, + cyclopts.Parameter( + help="Local file URL (file:///...). Mutually exclusive with --path." + ), + ] = None, + path: Annotated[ + str | None, + cyclopts.Parameter( + name=["--path"], + help="Local audio file path. Mutually exclusive with --url.", + ), ] = None, method: Annotated[ FetcherMethodType | None, cyclopts.Parameter(help="Force a specific source.") @@ -141,6 +154,14 @@ def search( ] = False, ): """Search for lyrics by metadata (bypasses MPRIS).""" + if url and path: + logger.error("--url and --path are mutually exclusive.") + sys.exit(1) + + if path: + resolved = str(Path(path).resolve()) + url = "file://" + quote(resolved, safe="/") + track = TrackMeta( title=title, artist=artist, diff --git a/lrcfetch/core.py b/lrcfetch/core.py index 2bb59ae..de92f9a 100644 --- a/lrcfetch/core.py +++ b/lrcfetch/core.py @@ -14,39 +14,14 @@ Fetch pipeline: from typing import Optional from loguru import logger -from typing import Literal -from .fetchers.netease import NeteaseFetcher -from .fetchers.qqmusic import QQMusicFetcher -from .fetchers.lrclib_search import LrclibSearchFetcher -from .fetchers.lrclib import LrclibFetcher -from .fetchers.spotify import SpotifyFetcher -from .fetchers.local import LocalFetcher -from .fetchers.cache_search import CacheSearchFetcher +from .fetchers import FetcherMethodType, create_fetchers from .fetchers.base import BaseFetcher from .cache import CacheEngine from .lrc import LRC_LINE_RE, normalize_tags from .config import TTL_SYNCED, TTL_UNSYNCED, TTL_NOT_FOUND, TTL_NETWORK_ERROR from .models import TrackMeta, LyricResult, CacheStatus - -METHODS = ( - "local", - "cache-search", - "spotify", - "lrclib", - "lrclib-search", - "netease", - "qqmusic", -) -FetcherMethodType = Literal[ - "local", - "cache-search", - "spotify", - "lrclib", - "lrclib-search", - "netease", - "qqmusic", -] +from .enrichers import enrich_track def _normalize_unsynced(lyrics: str) -> str: @@ -81,23 +56,9 @@ _STATUS_TTL: dict[CacheStatus, Optional[int]] = { class LrcManager: """Main entry point for fetching lyrics with caching.""" - # Fetchers that manage their own cache logic (skip per-source cache check) - _SELF_CACHED = frozenset({"cache-search"}) - def __init__(self) -> None: self.cache = CacheEngine() - self.fetchers: dict[FetcherMethodType, BaseFetcher] = { - "local": LocalFetcher(), - "cache-search": CacheSearchFetcher(self.cache), - "spotify": SpotifyFetcher(), - "lrclib": LrclibFetcher(), - "lrclib-search": LrclibSearchFetcher(), - "netease": NeteaseFetcher(), - "qqmusic": QQMusicFetcher(), - } - assert set(self.fetchers) == set(METHODS), ( - f"METHODS and fetchers out of sync: {set(METHODS) ^ set(self.fetchers)}" - ) + self.fetchers = create_fetchers(self.cache) def _build_sequence( self, track: TrackMeta, force_method: Optional[FetcherMethodType] = None @@ -142,6 +103,7 @@ class LrcManager: After all sources are tried, returns the best result found (synced > unsynced > None). """ + track = enrich_track(track) logger.info(f"Fetching lyrics for: {track.display_name()}") sequence = self._build_sequence(track, force_method) @@ -155,7 +117,7 @@ class LrcManager: source = fetcher.source_name # Cache check (skip for fetchers that handle their own caching) - if not bypass_cache and source not in self._SELF_CACHED: + if not bypass_cache and not fetcher.self_cached: cached = self.cache.get(track, source) if cached: if cached.status == CacheStatus.SUCCESS_SYNCED: @@ -176,12 +138,12 @@ class LrcManager: f"[{source}] cache hit: {cached.status.value}, skipping" ) continue - else: + elif not fetcher.self_cached: logger.debug(f"[{source}] cache bypassed") # Fetch logger.debug(f"[{source}] calling fetcher...") - result = fetcher.fetch(track) + result = fetcher.fetch(track, bypass_cache=bypass_cache) if not result: logger.debug(f"[{source}] returned None (no result)") @@ -196,8 +158,8 @@ class LrcManager: ttl=result.ttl, ) - # Cache the normalized result (skip for read-only fetchers) - if source not in self._SELF_CACHED: + # Cache the normalized result (skip for self-cached fetchers) + if not fetcher.self_cached: ttl = result.ttl or _STATUS_TTL.get(result.status, TTL_NOT_FOUND) self.cache.set(track, source, result, ttl_seconds=ttl) diff --git a/lrcfetch/enrichers/__init__.py b/lrcfetch/enrichers/__init__.py new file mode 100644 index 0000000..25309e2 --- /dev/null +++ b/lrcfetch/enrichers/__init__.py @@ -0,0 +1,39 @@ +""" +Author: Uyanide pywang0608@foxmail.com +Date: 2026-03-31 06:09:11 +Description: Metadata enrichment pipeline +""" + +from loguru import logger + +from .base import BaseEnricher +from .audio_tag import AudioTagEnricher +from .file_name import FileNameEnricher +from ..models import TrackMeta + +# Enrichers run in order; earlier ones have higher priority. +_ENRICHERS: list[BaseEnricher] = [ + AudioTagEnricher(), + FileNameEnricher(), +] + + +def enrich_track(track: TrackMeta) -> TrackMeta: + """Run all enrichers and return a track with missing fields filled in. + + Each enricher sees the cumulative state (earlier enrichers' results + are already applied). A field is only set if it is currently None. + """ + for enricher in _ENRICHERS: + try: + result = enricher.enrich(track) + except Exception as e: + logger.warning(f"Enricher {enricher.name} failed: {e}") + continue + if not result: + continue + # Only apply fields that are still None + updates = {k: v for k, v in result.items() if getattr(track, k, None) is None} + if updates: + track = track.model_copy(update=updates) + return track diff --git a/lrcfetch/enrichers/audio_tag.py b/lrcfetch/enrichers/audio_tag.py new file mode 100644 index 0000000..4e9f604 --- /dev/null +++ b/lrcfetch/enrichers/audio_tag.py @@ -0,0 +1,78 @@ +""" +Author: Uyanide pywang0608@foxmail.com +Date: 2026-03-31 06:11:27 +Description: Enricher that reads metadata from audio file tags (mutagen) +""" + +from typing import Optional +from loguru import logger +from mutagen._file import File, FileType + +from .base import BaseEnricher +from ..models import TrackMeta +from ..lrc import get_audio_path + + +class AudioTagEnricher(BaseEnricher): + """Extract title, artist, album, and duration from audio file tags.""" + + @property + def name(self) -> str: + return "audio-tag" + + def enrich(self, track: TrackMeta) -> Optional[dict]: + if not track.is_local or not track.url: + return None + + audio_path = get_audio_path(track.url, ensure_exists=True) + if not audio_path: + return None + + try: + audio = File(audio_path) + except Exception as e: + logger.debug(f"AudioTag: failed to read {audio_path}: {e}") + return None + + if audio is None: + return None + + updates: dict = {} + + # Try common tag names (vorbis comments, ID3, MP4) + title = _first_tag(audio, "title", "TIT2", "\xa9nam") + if title and not track.title: + updates["title"] = title + + artist = _first_tag(audio, "artist", "TPE1", "\xa9ART") + if artist and not track.artist: + updates["artist"] = artist + + album = _first_tag(audio, "album", "TALB", "\xa9alb") + if album and not track.album: + updates["album"] = album + + if not track.length and audio.info and hasattr(audio.info, "length"): + length_ms = int(audio.info.length * 1000) + if length_ms > 0: + updates["length"] = length_ms + + if updates: + logger.debug(f"AudioTag: enriched fields: {list(updates.keys())}") + return updates or None + + +def _first_tag(audio: FileType, *keys: str) -> Optional[str]: + """Return the first non-empty string value found among the given tag keys.""" + if not audio.tags: + return None + for key in keys: + val = audio.tags.get(key) + if val is None: + continue + # mutagen returns lists for vorbis, single values for ID3 + if isinstance(val, list): + val = val[0] if val else None + if val: + return str(val).strip() + return None diff --git a/lrcfetch/enrichers/base.py b/lrcfetch/enrichers/base.py new file mode 100644 index 0000000..f0a09da --- /dev/null +++ b/lrcfetch/enrichers/base.py @@ -0,0 +1,31 @@ +""" +Author: Uyanide pywang0608@foxmail.com +Date: 2026-03-31 06:08:16 +Description: Base class for metadata enrichers +""" + +from abc import ABC, abstractmethod +from typing import Optional + +from ..models import TrackMeta + + +class BaseEnricher(ABC): + """Attempts to fill missing fields on a TrackMeta. + + Each enricher inspects the track, and returns a dict of field names + to values for any fields it can provide. Only fields that are + currently ``None`` on the track will actually be applied. + """ + + @property + @abstractmethod + def name(self) -> str: ... + + @abstractmethod + def enrich(self, track: TrackMeta) -> Optional[dict]: + """Return a dict of {field_name: value} for fields this enricher can fill. + + Return None or an empty dict if nothing can be contributed. + """ + ... diff --git a/lrcfetch/enrichers/file_name.py b/lrcfetch/enrichers/file_name.py new file mode 100644 index 0000000..150cebd --- /dev/null +++ b/lrcfetch/enrichers/file_name.py @@ -0,0 +1,83 @@ +""" +Author: Uyanide pywang0608@foxmail.com +Date: 2026-03-31 06:08:44 +Description: Enricher that parses metadata from the audio file path +""" + +import re +from typing import Optional +from loguru import logger + +from .base import BaseEnricher +from ..models import TrackMeta +from ..lrc import get_audio_path + + +# Common track-number prefixes: "01 - ", "01. ", "1 - ", etc. +_TRACK_NUM_RE = re.compile(r"^\d{1,3}[\s.\-]+") + + +class FileNameEnricher(BaseEnricher): + """Derive artist / title from the file path when tags are unavailable. + + Heuristics (applied to the stem of the filename): + - "Artist - Title" → artist, title + - "01 - Title" → title only (leading track number stripped) + - "Title" → title only + + If artist is still missing after parsing the filename, the parent + directory name is used as a guess (common layout: ``Artist/Album/track``). + """ + + @property + def name(self) -> str: + return "file-name" + + def enrich(self, track: TrackMeta) -> Optional[dict]: + if not track.is_local or not track.url: + return None + + audio_path = get_audio_path(track.url, ensure_exists=False) + if not audio_path: + return None + + updates: dict = {} + stem = audio_path.stem + + # Try "Artist - Title" split + if " - " in stem: + left, right = stem.split(" - ", 1) + left = _TRACK_NUM_RE.sub("", left).strip() + right = right.strip() + + if left and right: + # Both sides non-empty after stripping track number + if not track.artist: + updates["artist"] = left + if not track.title: + updates["title"] = right + elif right: + # Left was only a track number → right is the title + if not track.title: + updates["title"] = right + else: + # No separator: strip track number, remainder is title + title_guess = _TRACK_NUM_RE.sub("", stem).strip() + if title_guess and not track.title: + updates["title"] = title_guess + + # Use parent directory as artist fallback + # Typical layout: /Music/Artist/Album/01 - Track.flac + if not track.artist and "artist" not in updates: + parents = audio_path.parents + if len(parents) >= 2: + album_dir = parents[0].name + artist_dir = parents[1].name + if artist_dir and artist_dir not in (".", "/"): + updates["artist"] = artist_dir + if not track.album and album_dir and album_dir != artist_dir: + updates["album"] = album_dir + + if updates: + logger.debug(f"FileName: enriched fields: {list(updates.keys())}") + return updates or None diff --git a/lrcfetch/fetchers/__init__.py b/lrcfetch/fetchers/__init__.py index e69de29..e0236aa 100644 --- a/lrcfetch/fetchers/__init__.py +++ b/lrcfetch/fetchers/__init__.py @@ -0,0 +1,54 @@ +""" +Author: Uyanide pywang0608@foxmail.com +Date: 2026-03-25 02:33:26 +Description: Fetcher pipeline — registry and types +""" + +from typing import Literal + +from .base import BaseFetcher +from .local import LocalFetcher +from .cache_search import CacheSearchFetcher +from .spotify import SpotifyFetcher +from .lrclib import LrclibFetcher +from .lrclib_search import LrclibSearchFetcher +from .netease import NeteaseFetcher +from .qqmusic import QQMusicFetcher +from ..cache import CacheEngine + +METHODS = ( + "local", + "cache-search", + "spotify", + "lrclib", + "lrclib-search", + "netease", + "qqmusic", +) + +FetcherMethodType = Literal[ + "local", + "cache-search", + "spotify", + "lrclib", + "lrclib-search", + "netease", + "qqmusic", +] + + +def create_fetchers(cache: CacheEngine) -> dict[str, BaseFetcher]: + """Instantiate all fetchers. Returns a dict keyed by source name.""" + fetchers: dict[str, BaseFetcher] = { + "local": LocalFetcher(), + "cache-search": CacheSearchFetcher(cache), + "spotify": SpotifyFetcher(), + "lrclib": LrclibFetcher(), + "lrclib-search": LrclibSearchFetcher(), + "netease": NeteaseFetcher(), + "qqmusic": QQMusicFetcher(), + } + assert set(fetchers) == set(METHODS), ( + f"METHODS and fetchers out of sync: {set(METHODS) ^ set(fetchers)}" + ) + return fetchers diff --git a/lrcfetch/fetchers/base.py b/lrcfetch/fetchers/base.py index afa30ed..28b8f98 100644 --- a/lrcfetch/fetchers/base.py +++ b/lrcfetch/fetchers/base.py @@ -17,7 +17,14 @@ class BaseFetcher(ABC): """Name of the fetcher source.""" pass + @property + def self_cached(self) -> bool: + """True if this fetcher manages its own cache (skip per-source cache check).""" + return False + @abstractmethod - def fetch(self, track: TrackMeta) -> Optional[LyricResult]: + def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: """Fetch lyrics for the given track. Returns None if unable to fetch.""" pass diff --git a/lrcfetch/fetchers/cache_search.py b/lrcfetch/fetchers/cache_search.py index 7d784f8..7246bbc 100644 --- a/lrcfetch/fetchers/cache_search.py +++ b/lrcfetch/fetchers/cache_search.py @@ -26,7 +26,17 @@ class CacheSearchFetcher(BaseFetcher): def source_name(self) -> str: return "cache-search" - def fetch(self, track: TrackMeta) -> Optional[LyricResult]: + @property + def self_cached(self) -> bool: + return True + + def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: + if bypass_cache: + logger.debug("Cache-search: bypassed by caller") + return None + if not track.title: logger.debug("Cache-search: skipped — no title") return None diff --git a/lrcfetch/fetchers/local.py b/lrcfetch/fetchers/local.py index f3569b8..e1e46e7 100644 --- a/lrcfetch/fetchers/local.py +++ b/lrcfetch/fetchers/local.py @@ -25,7 +25,9 @@ class LocalFetcher(BaseFetcher): def source_name(self) -> str: return "local" - def fetch(self, track: TrackMeta) -> Optional[LyricResult]: + def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: """Attempt to read lyrics from local filesystem.""" if not track.is_local or not track.url: return None diff --git a/lrcfetch/fetchers/lrclib.py b/lrcfetch/fetchers/lrclib.py index 2325dfa..e5e4c3f 100644 --- a/lrcfetch/fetchers/lrclib.py +++ b/lrcfetch/fetchers/lrclib.py @@ -30,7 +30,9 @@ class LrclibFetcher(BaseFetcher): def source_name(self) -> str: return "lrclib" - def fetch(self, track: TrackMeta) -> Optional[LyricResult]: + def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: """Fetch lyrics from LRCLIB. Requires complete metadata.""" if not track.is_complete: logger.debug("LRCLIB: skipped — incomplete metadata") diff --git a/lrcfetch/fetchers/lrclib_search.py b/lrcfetch/fetchers/lrclib_search.py index 37c6fca..83e2c30 100644 --- a/lrcfetch/fetchers/lrclib_search.py +++ b/lrcfetch/fetchers/lrclib_search.py @@ -32,7 +32,9 @@ class LrclibSearchFetcher(BaseFetcher): def source_name(self) -> str: return "lrclib-search" - def fetch(self, track: TrackMeta) -> Optional[LyricResult]: + def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: """Search LRCLIB for lyrics. Requires at least a title.""" if not track.title: logger.debug("LRCLIB-search: skipped — no title") diff --git a/lrcfetch/fetchers/netease.py b/lrcfetch/fetchers/netease.py index b8628b2..9d1ebba 100644 --- a/lrcfetch/fetchers/netease.py +++ b/lrcfetch/fetchers/netease.py @@ -194,7 +194,9 @@ class NeteaseFetcher(BaseFetcher): logger.error(f"Netease: lyric fetch failed for song_id={song_id}: {e}") return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) - def fetch(self, track: TrackMeta) -> Optional[LyricResult]: + def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: """Search for the track and fetch its lyrics.""" query = f"{track.artist or ''} {track.title or ''}".strip() if not query: diff --git a/lrcfetch/fetchers/qqmusic.py b/lrcfetch/fetchers/qqmusic.py index e6cd63a..d64c01e 100644 --- a/lrcfetch/fetchers/qqmusic.py +++ b/lrcfetch/fetchers/qqmusic.py @@ -155,7 +155,9 @@ class QQMusicFetcher(BaseFetcher): logger.error(f"QQMusic: lyric fetch failed for mid={mid}: {e}") return LyricResult(status=CacheStatus.NETWORK_ERROR, ttl=TTL_NETWORK_ERROR) - def fetch(self, track: TrackMeta) -> Optional[LyricResult]: + def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: """Search for the track and fetch its lyrics.""" if not QQ_MUSIC_API_URL: logger.debug("QQMusic: skipped — QQ_MUSIC_API_URL not configured") diff --git a/lrcfetch/fetchers/spotify.py b/lrcfetch/fetchers/spotify.py index 7ca90e7..7bc8e12 100644 --- a/lrcfetch/fetchers/spotify.py +++ b/lrcfetch/fetchers/spotify.py @@ -274,7 +274,9 @@ class SpotifyFetcher(BaseFetcher): continue return False - def fetch(self, track: TrackMeta) -> Optional[LyricResult]: + def fetch( + self, track: TrackMeta, bypass_cache: bool = False + ) -> Optional[LyricResult]: """Fetch lyrics for a Spotify track by its track ID.""" if not track.trackid: logger.debug("Spotify: skipped — no trackid in metadata") diff --git a/pyproject.toml b/pyproject.toml index a4fde28..001acdb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "lrcfetch" -version = "0.1.4" +version = "0.1.5" description = "Fetch line-synced lyrics for your music player." readme = "README.md" requires-python = ">=3.13" diff --git a/uv.lock b/uv.lock index eb0cfed..7fa21c8 100644 --- a/uv.lock +++ b/uv.lock @@ -153,7 +153,7 @@ wheels = [ [[package]] name = "lrcfetch" -version = "0.1.4" +version = "0.1.5" source = { editable = "." } dependencies = [ { name = "cyclopts" },