chore: switch to src layout
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-31 06:09:11
|
||||
Description: Metadata enrichment pipeline
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from .base import BaseEnricher
|
||||
from .audio_tag import AudioTagEnricher
|
||||
from .file_name import FileNameEnricher
|
||||
from .musixmatch import MusixmatchSpotifyEnricher
|
||||
from ..authenticators import BaseAuthenticator, MusixmatchAuthenticator
|
||||
from ..models import TrackMeta
|
||||
|
||||
# Enrichers run in order; earlier ones have higher priority.
|
||||
# There are only a few of them, so we can just call them sequentially without worrying about async concurrency or batching.
|
||||
|
||||
|
||||
def create_enrichers(
|
||||
authenticators: dict[str, BaseAuthenticator],
|
||||
) -> list[BaseEnricher]:
|
||||
"""Instantiate all enrichers."""
|
||||
mxm_auth = authenticators["musixmatch"]
|
||||
assert isinstance(mxm_auth, MusixmatchAuthenticator)
|
||||
return [
|
||||
AudioTagEnricher(),
|
||||
FileNameEnricher(),
|
||||
MusixmatchSpotifyEnricher(mxm_auth),
|
||||
]
|
||||
|
||||
|
||||
async def enrich_track(track: TrackMeta, enrichers: list[BaseEnricher]) -> TrackMeta:
|
||||
"""Run all enrichers and return a track with missing fields filled in.
|
||||
|
||||
Each enricher sees the cumulative state (earlier enrichers' results
|
||||
are already applied). A field is only set if it is currently None.
|
||||
"""
|
||||
for enricher in enrichers:
|
||||
try:
|
||||
# Skip if all provided fields are already filled
|
||||
if all(
|
||||
getattr(track, field, None) is not None for field in enricher.provides
|
||||
):
|
||||
continue
|
||||
|
||||
result = await enricher.enrich(track)
|
||||
except Exception as e:
|
||||
logger.warning(f"Enricher {enricher.name} failed: {e}")
|
||||
continue
|
||||
if not result:
|
||||
continue
|
||||
# Only apply fields that are still None
|
||||
updates = {k: v for k, v in result.items() if getattr(track, k, None) is None}
|
||||
if updates:
|
||||
for k, v in updates.items():
|
||||
setattr(track, k, v)
|
||||
return track
|
||||
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-31 06:11:27
|
||||
Description: Enricher that reads metadata from audio file tags.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
from mutagen._file import File, FileType
|
||||
|
||||
from .base import BaseEnricher
|
||||
from ..models import TrackMeta
|
||||
from ..lrc import get_audio_path
|
||||
|
||||
|
||||
class AudioTagEnricher(BaseEnricher):
|
||||
"""Extract title, artist, album, and duration from audio file tags."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "audio-tag"
|
||||
|
||||
@property
|
||||
def provides(self) -> set[str]:
|
||||
return {"title", "artist", "album", "length"}
|
||||
|
||||
async def enrich(self, track: TrackMeta) -> Optional[dict]:
|
||||
if not track.is_local or not track.url:
|
||||
return None
|
||||
|
||||
audio_path = get_audio_path(track.url, ensure_exists=True)
|
||||
if not audio_path:
|
||||
return None
|
||||
|
||||
try:
|
||||
audio = File(audio_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"AudioTag: failed to read {audio_path}: {e}")
|
||||
return None
|
||||
|
||||
if audio is None:
|
||||
return None
|
||||
|
||||
updates: dict = {}
|
||||
|
||||
# Try common tag names (vorbis comments, ID3, MP4)
|
||||
title = _first_tag(audio, "title", "TIT2", "\xa9nam")
|
||||
if title and not track.title:
|
||||
updates["title"] = title
|
||||
|
||||
artist = _first_tag(audio, "artist", "TPE1", "\xa9ART")
|
||||
if artist and not track.artist:
|
||||
updates["artist"] = artist
|
||||
|
||||
album = _first_tag(audio, "album", "TALB", "\xa9alb")
|
||||
if album and not track.album:
|
||||
updates["album"] = album
|
||||
|
||||
if not track.length and audio.info and hasattr(audio.info, "length"):
|
||||
length_ms = int(audio.info.length * 1000)
|
||||
if length_ms > 0:
|
||||
updates["length"] = length_ms
|
||||
|
||||
if updates:
|
||||
logger.debug(f"AudioTag: enriched fields: {list(updates.keys())}")
|
||||
return updates or None
|
||||
|
||||
|
||||
def _first_tag(audio: FileType, *keys: str) -> Optional[str]:
|
||||
"""Return the first non-empty string value found among the given tag keys."""
|
||||
if not audio.tags:
|
||||
return None
|
||||
for key in keys:
|
||||
val = audio.tags.get(key)
|
||||
if val is None:
|
||||
continue
|
||||
# mutagen returns lists for vorbis, single values for ID3
|
||||
if isinstance(val, list):
|
||||
val = val[0] if val else None
|
||||
if val:
|
||||
return str(val).strip()
|
||||
return None
|
||||
@@ -0,0 +1,35 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-31 06:08:16
|
||||
Description: Base class for metadata enrichers.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
from ..models import TrackMeta
|
||||
|
||||
|
||||
class BaseEnricher(ABC):
|
||||
"""Attempts to fill missing fields on a TrackMeta.
|
||||
|
||||
Each enricher inspects the track, and returns a dict of field names
|
||||
to values for any fields it can provide. Only fields that are
|
||||
currently ``None`` on the track will actually be applied.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str: ...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def provides(self) -> set[str]: ...
|
||||
|
||||
@abstractmethod
|
||||
async def enrich(self, track: TrackMeta) -> Optional[dict]:
|
||||
"""Return a dict of {field_name: value} for fields this enricher can fill.
|
||||
|
||||
Return None or an empty dict if nothing can be contributed.
|
||||
"""
|
||||
...
|
||||
@@ -0,0 +1,100 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-03-31 06:08:44
|
||||
Description: Enricher that parses metadata from the audio file path.
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
|
||||
from .base import BaseEnricher
|
||||
from ..models import TrackMeta
|
||||
from ..lrc import get_audio_path
|
||||
|
||||
|
||||
# Common track-number prefixes: "01 - ", "01. ", "1 - ", etc.
|
||||
_TRACK_NUM_RE = re.compile(r"^\d{1,3}[\s.\-]+")
|
||||
|
||||
|
||||
class FileNameEnricher(BaseEnricher):
|
||||
"""Derive artist / title from the file path when tags are unavailable.
|
||||
|
||||
Heuristics (applied to the stem of the filename):
|
||||
- "Artist - Title" → artist, title
|
||||
- "01 - Title" → title only (leading track number stripped)
|
||||
- "Title" → title only
|
||||
|
||||
If artist is still missing after parsing the filename, the parent
|
||||
directory name is used as a guess (common layout: ``Artist/Album/track``).
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "file-name"
|
||||
|
||||
@property
|
||||
def provides(self) -> set[str]:
|
||||
return {"artist", "title", "album"}
|
||||
|
||||
async def enrich(self, track: TrackMeta) -> Optional[dict]:
|
||||
if not track.is_local or not track.url:
|
||||
return None
|
||||
|
||||
audio_path = get_audio_path(track.url, ensure_exists=False)
|
||||
if not audio_path:
|
||||
return None
|
||||
|
||||
updates: dict = {}
|
||||
stem = audio_path.stem
|
||||
|
||||
# Try "Artist - Title" split
|
||||
if " - " in stem:
|
||||
left, right = stem.split(" - ", 1)
|
||||
left = _TRACK_NUM_RE.sub("", left).strip()
|
||||
right = right.strip()
|
||||
|
||||
if left and right:
|
||||
# Both sides non-empty after stripping track number
|
||||
if not track.artist:
|
||||
updates["artist"] = left
|
||||
if not track.title:
|
||||
updates["title"] = right
|
||||
elif right:
|
||||
# Left was only a track number → right is the title
|
||||
if not track.title:
|
||||
updates["title"] = right
|
||||
|
||||
# Try "Artist-Title" split (no spaces)
|
||||
elif "-" in stem:
|
||||
left, right = stem.split("-", 1)
|
||||
left = _TRACK_NUM_RE.sub("", left).strip()
|
||||
right = right.strip()
|
||||
|
||||
if left and right:
|
||||
if not track.artist:
|
||||
updates["artist"] = left
|
||||
if not track.title:
|
||||
updates["title"] = right
|
||||
elif right:
|
||||
if not track.title:
|
||||
updates["title"] = right
|
||||
|
||||
# No separator: strip track number, remainder is title
|
||||
else:
|
||||
title_guess = _TRACK_NUM_RE.sub("", stem).strip()
|
||||
if title_guess and not track.title:
|
||||
updates["title"] = title_guess
|
||||
|
||||
# Use parent directory as album fallback
|
||||
if not track.album and "album" not in updates:
|
||||
parents = audio_path.parents
|
||||
if len(parents) >= 1:
|
||||
album_dir = parents[0].name
|
||||
if album_dir and album_dir not in (".", "/"):
|
||||
if not track.album:
|
||||
updates["album"] = album_dir
|
||||
|
||||
if updates:
|
||||
logger.debug(f"FileName: enriched fields: {list(updates.keys())}")
|
||||
return updates or None
|
||||
@@ -0,0 +1,72 @@
|
||||
"""
|
||||
Author: Uyanide pywang0608@foxmail.com
|
||||
Date: 2026-04-05 02:13:49
|
||||
Description: Musixmatch metadata enricher (matcher.track.get by Spotify track ID).
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from .base import BaseEnricher
|
||||
from ..authenticators.musixmatch import MusixmatchAuthenticator
|
||||
from ..models import TrackMeta
|
||||
|
||||
_MUSIXMATCH_TRACK_MATCH_URL = (
|
||||
"https://apic-desktop.musixmatch.com/ws/1.1/matcher.track.get"
|
||||
)
|
||||
|
||||
|
||||
class MusixmatchSpotifyEnricher(BaseEnricher):
|
||||
"""Fill title, artist, album, and length from Musixmatch using Spotify track ID."""
|
||||
|
||||
def __init__(self, auth: MusixmatchAuthenticator) -> None:
|
||||
self.auth = auth
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "musixmatch"
|
||||
|
||||
@property
|
||||
def provides(self) -> set[str]:
|
||||
return {"title", "artist", "album", "length"}
|
||||
|
||||
async def enrich(self, track: TrackMeta) -> Optional[dict]:
|
||||
if not track.trackid:
|
||||
return None
|
||||
|
||||
logger.debug(f"Musixmatch enricher: looking up trackid={track.trackid}")
|
||||
|
||||
try:
|
||||
data = await self.auth.get_json(
|
||||
_MUSIXMATCH_TRACK_MATCH_URL,
|
||||
{"track_spotify_id": track.trackid},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Musixmatch enricher: request failed: {e}")
|
||||
return None
|
||||
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
body = data.get("message", {}).get("body")
|
||||
t = body.get("track") if isinstance(body, dict) else None
|
||||
if not isinstance(t, dict):
|
||||
logger.debug(
|
||||
f"Musixmatch enricher: no track data for trackid={track.trackid}"
|
||||
)
|
||||
return None
|
||||
|
||||
updates: dict = {}
|
||||
if isinstance(t.get("track_name"), str) and t["track_name"]:
|
||||
updates["title"] = t["track_name"]
|
||||
if isinstance(t.get("artist_name"), str) and t["artist_name"]:
|
||||
updates["artist"] = t["artist_name"]
|
||||
if isinstance(t.get("album_name"), str) and t["album_name"]:
|
||||
updates["album"] = t["album_name"]
|
||||
if isinstance(t.get("track_length"), int) and t["track_length"] > 0:
|
||||
updates["length"] = t["track_length"] * 1000
|
||||
|
||||
if updates:
|
||||
logger.debug(f"Musixmatch enricher: filled {list(updates.keys())}")
|
||||
return updates or None
|
||||
Reference in New Issue
Block a user