chore: switch to src layout

2026-04-06 09:15:07 +02:00
parent c5abbff14c
commit 69b7f5c60c
35 changed files with 4 additions and 7 deletions
@@ -0,0 +1,58 @@
+"""
+Author: Uyanide pywang0608@foxmail.com
+Date: 2026-03-31 06:09:11
+Description: Metadata enrichment pipeline
+"""
+
+from loguru import logger
+
+from .base import BaseEnricher
+from .audio_tag import AudioTagEnricher
+from .file_name import FileNameEnricher
+from .musixmatch import MusixmatchSpotifyEnricher
+from ..authenticators import BaseAuthenticator, MusixmatchAuthenticator
+from ..models import TrackMeta
+
+# Enrichers run in order; earlier ones have higher priority.
+# There are only a few of them, so we can just call them sequentially without worrying about async concurrency or batching.
+
+
+def create_enrichers(
+    authenticators: dict[str, BaseAuthenticator],
+) -> list[BaseEnricher]:
+    """Instantiate all enrichers."""
+    mxm_auth = authenticators["musixmatch"]
+    assert isinstance(mxm_auth, MusixmatchAuthenticator)
+    return [
+        AudioTagEnricher(),
+        FileNameEnricher(),
+        MusixmatchSpotifyEnricher(mxm_auth),
+    ]
+
+
+async def enrich_track(track: TrackMeta, enrichers: list[BaseEnricher]) -> TrackMeta:
+    """Run all enrichers and return a track with missing fields filled in.
+
+    Each enricher sees the cumulative state (earlier enrichers' results
+    are already applied).  A field is only set if it is currently None.
+    """
+    for enricher in enrichers:
+        try:
+            # Skip if all provided fields are already filled
+            if all(
+                getattr(track, field, None) is not None for field in enricher.provides
+            ):
+                continue
+
+            result = await enricher.enrich(track)
+        except Exception as e:
+            logger.warning(f"Enricher {enricher.name} failed: {e}")
+            continue
+        if not result:
+            continue
+        # Only apply fields that are still None
+        updates = {k: v for k, v in result.items() if getattr(track, k, None) is None}
+        if updates:
+            for k, v in updates.items():
+                setattr(track, k, v)
+    return track
@@ -0,0 +1,82 @@
+"""
+Author: Uyanide pywang0608@foxmail.com
+Date: 2026-03-31 06:11:27
+Description: Enricher that reads metadata from audio file tags.
+"""
+
+from typing import Optional
+from loguru import logger
+from mutagen._file import File, FileType
+
+from .base import BaseEnricher
+from ..models import TrackMeta
+from ..lrc import get_audio_path
+
+
+class AudioTagEnricher(BaseEnricher):
+    """Extract title, artist, album, and duration from audio file tags."""
+
+    @property
+    def name(self) -> str:
+        return "audio-tag"
+
+    @property
+    def provides(self) -> set[str]:
+        return {"title", "artist", "album", "length"}
+
+    async def enrich(self, track: TrackMeta) -> Optional[dict]:
+        if not track.is_local or not track.url:
+            return None
+
+        audio_path = get_audio_path(track.url, ensure_exists=True)
+        if not audio_path:
+            return None
+
+        try:
+            audio = File(audio_path)
+        except Exception as e:
+            logger.debug(f"AudioTag: failed to read {audio_path}: {e}")
+            return None
+
+        if audio is None:
+            return None
+
+        updates: dict = {}
+
+        # Try common tag names (vorbis comments, ID3, MP4)
+        title = _first_tag(audio, "title", "TIT2", "\xa9nam")
+        if title and not track.title:
+            updates["title"] = title
+
+        artist = _first_tag(audio, "artist", "TPE1", "\xa9ART")
+        if artist and not track.artist:
+            updates["artist"] = artist
+
+        album = _first_tag(audio, "album", "TALB", "\xa9alb")
+        if album and not track.album:
+            updates["album"] = album
+
+        if not track.length and audio.info and hasattr(audio.info, "length"):
+            length_ms = int(audio.info.length * 1000)
+            if length_ms > 0:
+                updates["length"] = length_ms
+
+        if updates:
+            logger.debug(f"AudioTag: enriched fields: {list(updates.keys())}")
+        return updates or None
+
+
+def _first_tag(audio: FileType, *keys: str) -> Optional[str]:
+    """Return the first non-empty string value found among the given tag keys."""
+    if not audio.tags:
+        return None
+    for key in keys:
+        val = audio.tags.get(key)
+        if val is None:
+            continue
+        # mutagen returns lists for vorbis, single values for ID3
+        if isinstance(val, list):
+            val = val[0] if val else None
+        if val:
+            return str(val).strip()
+    return None
@@ -0,0 +1,35 @@
+"""
+Author: Uyanide pywang0608@foxmail.com
+Date: 2026-03-31 06:08:16
+Description: Base class for metadata enrichers.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from ..models import TrackMeta
+
+
+class BaseEnricher(ABC):
+    """Attempts to fill missing fields on a TrackMeta.
+
+    Each enricher inspects the track, and returns a dict of field names
+    to values for any fields it can provide.  Only fields that are
+    currently ``None`` on the track will actually be applied.
+    """
+
+    @property
+    @abstractmethod
+    def name(self) -> str: ...
+
+    @property
+    @abstractmethod
+    def provides(self) -> set[str]: ...
+
+    @abstractmethod
+    async def enrich(self, track: TrackMeta) -> Optional[dict]:
+        """Return a dict of {field_name: value} for fields this enricher can fill.
+
+        Return None or an empty dict if nothing can be contributed.
+        """
+        ...
@@ -0,0 +1,100 @@
+"""
+Author: Uyanide pywang0608@foxmail.com
+Date: 2026-03-31 06:08:44
+Description: Enricher that parses metadata from the audio file path.
+"""
+
+import re
+from typing import Optional
+from loguru import logger
+
+from .base import BaseEnricher
+from ..models import TrackMeta
+from ..lrc import get_audio_path
+
+
+# Common track-number prefixes: "01 - ", "01. ", "1 - ", etc.
+_TRACK_NUM_RE = re.compile(r"^\d{1,3}[\s.\-]+")
+
+
+class FileNameEnricher(BaseEnricher):
+    """Derive artist / title from the file path when tags are unavailable.
+
+    Heuristics (applied to the stem of the filename):
+      - "Artist - Title"  →  artist, title
+      - "01 - Title"      →  title only (leading track number stripped)
+      - "Title"           →  title only
+
+    If artist is still missing after parsing the filename, the parent
+    directory name is used as a guess (common layout: ``Artist/Album/track``).
+    """
+
+    @property
+    def name(self) -> str:
+        return "file-name"
+
+    @property
+    def provides(self) -> set[str]:
+        return {"artist", "title", "album"}
+
+    async def enrich(self, track: TrackMeta) -> Optional[dict]:
+        if not track.is_local or not track.url:
+            return None
+
+        audio_path = get_audio_path(track.url, ensure_exists=False)
+        if not audio_path:
+            return None
+
+        updates: dict = {}
+        stem = audio_path.stem
+
+        # Try "Artist - Title" split
+        if " - " in stem:
+            left, right = stem.split(" - ", 1)
+            left = _TRACK_NUM_RE.sub("", left).strip()
+            right = right.strip()
+
+            if left and right:
+                # Both sides non-empty after stripping track number
+                if not track.artist:
+                    updates["artist"] = left
+                if not track.title:
+                    updates["title"] = right
+            elif right:
+                # Left was only a track number → right is the title
+                if not track.title:
+                    updates["title"] = right
+
+        # Try "Artist-Title" split (no spaces)
+        elif "-" in stem:
+            left, right = stem.split("-", 1)
+            left = _TRACK_NUM_RE.sub("", left).strip()
+            right = right.strip()
+
+            if left and right:
+                if not track.artist:
+                    updates["artist"] = left
+                if not track.title:
+                    updates["title"] = right
+            elif right:
+                if not track.title:
+                    updates["title"] = right
+
+        # No separator: strip track number, remainder is title
+        else:
+            title_guess = _TRACK_NUM_RE.sub("", stem).strip()
+            if title_guess and not track.title:
+                updates["title"] = title_guess
+
+        # Use parent directory as album fallback
+        if not track.album and "album" not in updates:
+            parents = audio_path.parents
+            if len(parents) >= 1:
+                album_dir = parents[0].name
+                if album_dir and album_dir not in (".", "/"):
+                    if not track.album:
+                        updates["album"] = album_dir
+
+        if updates:
+            logger.debug(f"FileName: enriched fields: {list(updates.keys())}")
+        return updates or None
@@ -0,0 +1,72 @@
+"""
+Author: Uyanide pywang0608@foxmail.com
+Date: 2026-04-05 02:13:49
+Description: Musixmatch metadata enricher (matcher.track.get by Spotify track ID).
+"""
+
+from typing import Optional
+
+from loguru import logger
+
+from .base import BaseEnricher
+from ..authenticators.musixmatch import MusixmatchAuthenticator
+from ..models import TrackMeta
+
+_MUSIXMATCH_TRACK_MATCH_URL = (
+    "https://apic-desktop.musixmatch.com/ws/1.1/matcher.track.get"
+)
+
+
+class MusixmatchSpotifyEnricher(BaseEnricher):
+    """Fill title, artist, album, and length from Musixmatch using Spotify track ID."""
+
+    def __init__(self, auth: MusixmatchAuthenticator) -> None:
+        self.auth = auth
+
+    @property
+    def name(self) -> str:
+        return "musixmatch"
+
+    @property
+    def provides(self) -> set[str]:
+        return {"title", "artist", "album", "length"}
+
+    async def enrich(self, track: TrackMeta) -> Optional[dict]:
+        if not track.trackid:
+            return None
+
+        logger.debug(f"Musixmatch enricher: looking up trackid={track.trackid}")
+
+        try:
+            data = await self.auth.get_json(
+                _MUSIXMATCH_TRACK_MATCH_URL,
+                {"track_spotify_id": track.trackid},
+            )
+        except Exception as e:
+            logger.warning(f"Musixmatch enricher: request failed: {e}")
+            return None
+
+        if data is None:
+            return None
+
+        body = data.get("message", {}).get("body")
+        t = body.get("track") if isinstance(body, dict) else None
+        if not isinstance(t, dict):
+            logger.debug(
+                f"Musixmatch enricher: no track data for trackid={track.trackid}"
+            )
+            return None
+
+        updates: dict = {}
+        if isinstance(t.get("track_name"), str) and t["track_name"]:
+            updates["title"] = t["track_name"]
+        if isinstance(t.get("artist_name"), str) and t["artist_name"]:
+            updates["artist"] = t["artist_name"]
+        if isinstance(t.get("album_name"), str) and t["album_name"]:
+            updates["album"] = t["album_name"]
+        if isinstance(t.get("track_length"), int) and t["track_length"] > 0:
+            updates["length"] = t["track_length"] * 1000
+
+        if updates:
+            logger.debug(f"Musixmatch enricher: filled {list(updates.keys())}")
+        return updates or None