chore: switch to src layout

This commit is contained in:
2026-04-06 09:15:07 +02:00
parent c5abbff14c
commit 69b7f5c60c
35 changed files with 4 additions and 7 deletions
+58
View File
@@ -0,0 +1,58 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 06:09:11
Description: Metadata enrichment pipeline
"""
from loguru import logger
from .base import BaseEnricher
from .audio_tag import AudioTagEnricher
from .file_name import FileNameEnricher
from .musixmatch import MusixmatchSpotifyEnricher
from ..authenticators import BaseAuthenticator, MusixmatchAuthenticator
from ..models import TrackMeta
# Enrichers run in order; earlier ones have higher priority.
# There are only a few of them, so we can just call them sequentially without worrying about async concurrency or batching.
def create_enrichers(
authenticators: dict[str, BaseAuthenticator],
) -> list[BaseEnricher]:
"""Instantiate all enrichers."""
mxm_auth = authenticators["musixmatch"]
assert isinstance(mxm_auth, MusixmatchAuthenticator)
return [
AudioTagEnricher(),
FileNameEnricher(),
MusixmatchSpotifyEnricher(mxm_auth),
]
async def enrich_track(track: TrackMeta, enrichers: list[BaseEnricher]) -> TrackMeta:
"""Run all enrichers and return a track with missing fields filled in.
Each enricher sees the cumulative state (earlier enrichers' results
are already applied). A field is only set if it is currently None.
"""
for enricher in enrichers:
try:
# Skip if all provided fields are already filled
if all(
getattr(track, field, None) is not None for field in enricher.provides
):
continue
result = await enricher.enrich(track)
except Exception as e:
logger.warning(f"Enricher {enricher.name} failed: {e}")
continue
if not result:
continue
# Only apply fields that are still None
updates = {k: v for k, v in result.items() if getattr(track, k, None) is None}
if updates:
for k, v in updates.items():
setattr(track, k, v)
return track
+82
View File
@@ -0,0 +1,82 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 06:11:27
Description: Enricher that reads metadata from audio file tags.
"""
from typing import Optional
from loguru import logger
from mutagen._file import File, FileType
from .base import BaseEnricher
from ..models import TrackMeta
from ..lrc import get_audio_path
class AudioTagEnricher(BaseEnricher):
"""Extract title, artist, album, and duration from audio file tags."""
@property
def name(self) -> str:
return "audio-tag"
@property
def provides(self) -> set[str]:
return {"title", "artist", "album", "length"}
async def enrich(self, track: TrackMeta) -> Optional[dict]:
if not track.is_local or not track.url:
return None
audio_path = get_audio_path(track.url, ensure_exists=True)
if not audio_path:
return None
try:
audio = File(audio_path)
except Exception as e:
logger.debug(f"AudioTag: failed to read {audio_path}: {e}")
return None
if audio is None:
return None
updates: dict = {}
# Try common tag names (vorbis comments, ID3, MP4)
title = _first_tag(audio, "title", "TIT2", "\xa9nam")
if title and not track.title:
updates["title"] = title
artist = _first_tag(audio, "artist", "TPE1", "\xa9ART")
if artist and not track.artist:
updates["artist"] = artist
album = _first_tag(audio, "album", "TALB", "\xa9alb")
if album and not track.album:
updates["album"] = album
if not track.length and audio.info and hasattr(audio.info, "length"):
length_ms = int(audio.info.length * 1000)
if length_ms > 0:
updates["length"] = length_ms
if updates:
logger.debug(f"AudioTag: enriched fields: {list(updates.keys())}")
return updates or None
def _first_tag(audio: FileType, *keys: str) -> Optional[str]:
"""Return the first non-empty string value found among the given tag keys."""
if not audio.tags:
return None
for key in keys:
val = audio.tags.get(key)
if val is None:
continue
# mutagen returns lists for vorbis, single values for ID3
if isinstance(val, list):
val = val[0] if val else None
if val:
return str(val).strip()
return None
+35
View File
@@ -0,0 +1,35 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 06:08:16
Description: Base class for metadata enrichers.
"""
from abc import ABC, abstractmethod
from typing import Optional
from ..models import TrackMeta
class BaseEnricher(ABC):
"""Attempts to fill missing fields on a TrackMeta.
Each enricher inspects the track, and returns a dict of field names
to values for any fields it can provide. Only fields that are
currently ``None`` on the track will actually be applied.
"""
@property
@abstractmethod
def name(self) -> str: ...
@property
@abstractmethod
def provides(self) -> set[str]: ...
@abstractmethod
async def enrich(self, track: TrackMeta) -> Optional[dict]:
"""Return a dict of {field_name: value} for fields this enricher can fill.
Return None or an empty dict if nothing can be contributed.
"""
...
+100
View File
@@ -0,0 +1,100 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 06:08:44
Description: Enricher that parses metadata from the audio file path.
"""
import re
from typing import Optional
from loguru import logger
from .base import BaseEnricher
from ..models import TrackMeta
from ..lrc import get_audio_path
# Common track-number prefixes: "01 - ", "01. ", "1 - ", etc.
_TRACK_NUM_RE = re.compile(r"^\d{1,3}[\s.\-]+")
class FileNameEnricher(BaseEnricher):
"""Derive artist / title from the file path when tags are unavailable.
Heuristics (applied to the stem of the filename):
- "Artist - Title" → artist, title
- "01 - Title" → title only (leading track number stripped)
- "Title" → title only
If artist is still missing after parsing the filename, the parent
directory name is used as a guess (common layout: ``Artist/Album/track``).
"""
@property
def name(self) -> str:
return "file-name"
@property
def provides(self) -> set[str]:
return {"artist", "title", "album"}
async def enrich(self, track: TrackMeta) -> Optional[dict]:
if not track.is_local or not track.url:
return None
audio_path = get_audio_path(track.url, ensure_exists=False)
if not audio_path:
return None
updates: dict = {}
stem = audio_path.stem
# Try "Artist - Title" split
if " - " in stem:
left, right = stem.split(" - ", 1)
left = _TRACK_NUM_RE.sub("", left).strip()
right = right.strip()
if left and right:
# Both sides non-empty after stripping track number
if not track.artist:
updates["artist"] = left
if not track.title:
updates["title"] = right
elif right:
# Left was only a track number → right is the title
if not track.title:
updates["title"] = right
# Try "Artist-Title" split (no spaces)
elif "-" in stem:
left, right = stem.split("-", 1)
left = _TRACK_NUM_RE.sub("", left).strip()
right = right.strip()
if left and right:
if not track.artist:
updates["artist"] = left
if not track.title:
updates["title"] = right
elif right:
if not track.title:
updates["title"] = right
# No separator: strip track number, remainder is title
else:
title_guess = _TRACK_NUM_RE.sub("", stem).strip()
if title_guess and not track.title:
updates["title"] = title_guess
# Use parent directory as album fallback
if not track.album and "album" not in updates:
parents = audio_path.parents
if len(parents) >= 1:
album_dir = parents[0].name
if album_dir and album_dir not in (".", "/"):
if not track.album:
updates["album"] = album_dir
if updates:
logger.debug(f"FileName: enriched fields: {list(updates.keys())}")
return updates or None
+72
View File
@@ -0,0 +1,72 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-04-05 02:13:49
Description: Musixmatch metadata enricher (matcher.track.get by Spotify track ID).
"""
from typing import Optional
from loguru import logger
from .base import BaseEnricher
from ..authenticators.musixmatch import MusixmatchAuthenticator
from ..models import TrackMeta
_MUSIXMATCH_TRACK_MATCH_URL = (
"https://apic-desktop.musixmatch.com/ws/1.1/matcher.track.get"
)
class MusixmatchSpotifyEnricher(BaseEnricher):
"""Fill title, artist, album, and length from Musixmatch using Spotify track ID."""
def __init__(self, auth: MusixmatchAuthenticator) -> None:
self.auth = auth
@property
def name(self) -> str:
return "musixmatch"
@property
def provides(self) -> set[str]:
return {"title", "artist", "album", "length"}
async def enrich(self, track: TrackMeta) -> Optional[dict]:
if not track.trackid:
return None
logger.debug(f"Musixmatch enricher: looking up trackid={track.trackid}")
try:
data = await self.auth.get_json(
_MUSIXMATCH_TRACK_MATCH_URL,
{"track_spotify_id": track.trackid},
)
except Exception as e:
logger.warning(f"Musixmatch enricher: request failed: {e}")
return None
if data is None:
return None
body = data.get("message", {}).get("body")
t = body.get("track") if isinstance(body, dict) else None
if not isinstance(t, dict):
logger.debug(
f"Musixmatch enricher: no track data for trackid={track.trackid}"
)
return None
updates: dict = {}
if isinstance(t.get("track_name"), str) and t["track_name"]:
updates["title"] = t["track_name"]
if isinstance(t.get("artist_name"), str) and t["artist_name"]:
updates["artist"] = t["artist_name"]
if isinstance(t.get("album_name"), str) and t["album_name"]:
updates["album"] = t["album_name"]
if isinstance(t.get("track_length"), int) and t["track_length"] > 0:
updates["length"] = t["track_length"] * 1000
if updates:
logger.debug(f"Musixmatch enricher: filled {list(updates.keys())}")
return updates or None