feat: add metadata enrichers & refactor

This commit is contained in:
2026-03-31 06:08:16 +02:00
parent d76b25e250
commit 4e83e6be15
19 changed files with 363 additions and 60 deletions
+39
View File
@@ -0,0 +1,39 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 06:09:11
Description: Metadata enrichment pipeline
"""
from loguru import logger
from .base import BaseEnricher
from .audio_tag import AudioTagEnricher
from .file_name import FileNameEnricher
from ..models import TrackMeta
# Enrichers run in order; earlier ones have higher priority.
_ENRICHERS: list[BaseEnricher] = [
AudioTagEnricher(),
FileNameEnricher(),
]
def enrich_track(track: TrackMeta) -> TrackMeta:
"""Run all enrichers and return a track with missing fields filled in.
Each enricher sees the cumulative state (earlier enrichers' results
are already applied). A field is only set if it is currently None.
"""
for enricher in _ENRICHERS:
try:
result = enricher.enrich(track)
except Exception as e:
logger.warning(f"Enricher {enricher.name} failed: {e}")
continue
if not result:
continue
# Only apply fields that are still None
updates = {k: v for k, v in result.items() if getattr(track, k, None) is None}
if updates:
track = track.model_copy(update=updates)
return track
+78
View File
@@ -0,0 +1,78 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 06:11:27
Description: Enricher that reads metadata from audio file tags (mutagen)
"""
from typing import Optional
from loguru import logger
from mutagen._file import File, FileType
from .base import BaseEnricher
from ..models import TrackMeta
from ..lrc import get_audio_path
class AudioTagEnricher(BaseEnricher):
"""Extract title, artist, album, and duration from audio file tags."""
@property
def name(self) -> str:
return "audio-tag"
def enrich(self, track: TrackMeta) -> Optional[dict]:
if not track.is_local or not track.url:
return None
audio_path = get_audio_path(track.url, ensure_exists=True)
if not audio_path:
return None
try:
audio = File(audio_path)
except Exception as e:
logger.debug(f"AudioTag: failed to read {audio_path}: {e}")
return None
if audio is None:
return None
updates: dict = {}
# Try common tag names (vorbis comments, ID3, MP4)
title = _first_tag(audio, "title", "TIT2", "\xa9nam")
if title and not track.title:
updates["title"] = title
artist = _first_tag(audio, "artist", "TPE1", "\xa9ART")
if artist and not track.artist:
updates["artist"] = artist
album = _first_tag(audio, "album", "TALB", "\xa9alb")
if album and not track.album:
updates["album"] = album
if not track.length and audio.info and hasattr(audio.info, "length"):
length_ms = int(audio.info.length * 1000)
if length_ms > 0:
updates["length"] = length_ms
if updates:
logger.debug(f"AudioTag: enriched fields: {list(updates.keys())}")
return updates or None
def _first_tag(audio: FileType, *keys: str) -> Optional[str]:
"""Return the first non-empty string value found among the given tag keys."""
if not audio.tags:
return None
for key in keys:
val = audio.tags.get(key)
if val is None:
continue
# mutagen returns lists for vorbis, single values for ID3
if isinstance(val, list):
val = val[0] if val else None
if val:
return str(val).strip()
return None
+31
View File
@@ -0,0 +1,31 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 06:08:16
Description: Base class for metadata enrichers
"""
from abc import ABC, abstractmethod
from typing import Optional
from ..models import TrackMeta
class BaseEnricher(ABC):
"""Attempts to fill missing fields on a TrackMeta.
Each enricher inspects the track, and returns a dict of field names
to values for any fields it can provide. Only fields that are
currently ``None`` on the track will actually be applied.
"""
@property
@abstractmethod
def name(self) -> str: ...
@abstractmethod
def enrich(self, track: TrackMeta) -> Optional[dict]:
"""Return a dict of {field_name: value} for fields this enricher can fill.
Return None or an empty dict if nothing can be contributed.
"""
...
+83
View File
@@ -0,0 +1,83 @@
"""
Author: Uyanide pywang0608@foxmail.com
Date: 2026-03-31 06:08:44
Description: Enricher that parses metadata from the audio file path
"""
import re
from typing import Optional
from loguru import logger
from .base import BaseEnricher
from ..models import TrackMeta
from ..lrc import get_audio_path
# Common track-number prefixes: "01 - ", "01. ", "1 - ", etc.
_TRACK_NUM_RE = re.compile(r"^\d{1,3}[\s.\-]+")
class FileNameEnricher(BaseEnricher):
"""Derive artist / title from the file path when tags are unavailable.
Heuristics (applied to the stem of the filename):
- "Artist - Title" → artist, title
- "01 - Title" → title only (leading track number stripped)
- "Title" → title only
If artist is still missing after parsing the filename, the parent
directory name is used as a guess (common layout: ``Artist/Album/track``).
"""
@property
def name(self) -> str:
return "file-name"
def enrich(self, track: TrackMeta) -> Optional[dict]:
if not track.is_local or not track.url:
return None
audio_path = get_audio_path(track.url, ensure_exists=False)
if not audio_path:
return None
updates: dict = {}
stem = audio_path.stem
# Try "Artist - Title" split
if " - " in stem:
left, right = stem.split(" - ", 1)
left = _TRACK_NUM_RE.sub("", left).strip()
right = right.strip()
if left and right:
# Both sides non-empty after stripping track number
if not track.artist:
updates["artist"] = left
if not track.title:
updates["title"] = right
elif right:
# Left was only a track number → right is the title
if not track.title:
updates["title"] = right
else:
# No separator: strip track number, remainder is title
title_guess = _TRACK_NUM_RE.sub("", stem).strip()
if title_guess and not track.title:
updates["title"] = title_guess
# Use parent directory as artist fallback
# Typical layout: /Music/Artist/Album/01 - Track.flac
if not track.artist and "artist" not in updates:
parents = audio_path.parents
if len(parents) >= 2:
album_dir = parents[0].name
artist_dir = parents[1].name
if artist_dir and artist_dir not in (".", "/"):
updates["artist"] = artist_dir
if not track.album and album_dir and album_dir != artist_dir:
updates["album"] = album_dir
if updates:
logger.debug(f"FileName: enriched fields: {list(updates.keys())}")
return updates or None