feat: add exact metadata match for cache search in CacheSearchFetcher
This commit is contained in:
@@ -12,3 +12,4 @@ wheels/
|
|||||||
.env
|
.env
|
||||||
.claude
|
.claude
|
||||||
.vscode
|
.vscode
|
||||||
|
.ruff_cache
|
||||||
|
|||||||
+48
-2
@@ -16,7 +16,9 @@ from .config import DB_PATH, DURATION_TOLERANCE_MS
|
|||||||
from .models import TrackMeta, LyricResult, CacheStatus
|
from .models import TrackMeta, LyricResult, CacheStatus
|
||||||
|
|
||||||
# Punctuation to strip for fuzzy matching (ASCII + common fullwidth)
|
# Punctuation to strip for fuzzy matching (ASCII + common fullwidth)
|
||||||
_PUNCT_RE = re.compile(r"[~!@#$%^&*()_+\-=\[\]{}|;:'\",.<>?/\\`~!@#$%^&*()_+-=【】{}|;:'",。<>?/\`]")
|
_PUNCT_RE = re.compile(
|
||||||
|
r"[~!@#$%^&*()_+\-=\[\]{}|;:'\",.<>?/\\`~!@#$%^&*()_+-=【】{}|;:'",。<>?/\`]"
|
||||||
|
)
|
||||||
_SPACE_RE = re.compile(r"\s+")
|
_SPACE_RE = re.compile(r"\s+")
|
||||||
|
|
||||||
|
|
||||||
@@ -258,6 +260,45 @@ class CacheEngine:
|
|||||||
params.append(track.album)
|
params.append(track.album)
|
||||||
return conditions, params
|
return conditions, params
|
||||||
|
|
||||||
|
# Exact cross-source search
|
||||||
|
|
||||||
|
def find_best_positive(self, track: TrackMeta) -> Optional[LyricResult]:
|
||||||
|
"""Find the best positive (synced/unsynced) cache entry for *track*.
|
||||||
|
|
||||||
|
Uses exact metadata match (artist + title + album) across all sources.
|
||||||
|
Returns synced if available, otherwise unsynced, or None.
|
||||||
|
"""
|
||||||
|
conditions, params = self._track_where(track)
|
||||||
|
if not conditions:
|
||||||
|
return None
|
||||||
|
|
||||||
|
now = int(time.time())
|
||||||
|
conditions.append("status IN (?, ?)")
|
||||||
|
params.extend(
|
||||||
|
[CacheStatus.SUCCESS_SYNCED.value, CacheStatus.SUCCESS_UNSYNCED.value]
|
||||||
|
)
|
||||||
|
conditions.append("(expires_at IS NULL OR expires_at > ?)")
|
||||||
|
params.append(str(now))
|
||||||
|
|
||||||
|
where = " AND ".join(conditions)
|
||||||
|
with sqlite3.connect(self.db_path) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
rows = conn.execute(
|
||||||
|
f"SELECT status, lyrics, source FROM cache WHERE {where} "
|
||||||
|
"ORDER BY CASE status WHEN ? THEN 0 ELSE 1 END LIMIT 1",
|
||||||
|
params + [CacheStatus.SUCCESS_SYNCED.value],
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return None
|
||||||
|
|
||||||
|
row = dict(rows[0])
|
||||||
|
return LyricResult(
|
||||||
|
status=CacheStatus(row["status"]),
|
||||||
|
lyrics=row["lyrics"],
|
||||||
|
source="cache-search",
|
||||||
|
)
|
||||||
|
|
||||||
# Fuzzy search
|
# Fuzzy search
|
||||||
|
|
||||||
def search_by_meta(
|
def search_by_meta(
|
||||||
@@ -318,7 +359,12 @@ class CacheEngine:
|
|||||||
else:
|
else:
|
||||||
# No duration info in cache — still a candidate but lower priority
|
# No duration info in cache — still a candidate but lower priority
|
||||||
scored.append((DURATION_TOLERANCE_MS, m))
|
scored.append((DURATION_TOLERANCE_MS, m))
|
||||||
scored.sort(key=lambda x: (x[0], x[1].get("status") != CacheStatus.SUCCESS_SYNCED.value))
|
scored.sort(
|
||||||
|
key=lambda x: (
|
||||||
|
x[0],
|
||||||
|
x[1].get("status") != CacheStatus.SUCCESS_SYNCED.value,
|
||||||
|
)
|
||||||
|
)
|
||||||
matches = [m for _, m in scored]
|
matches = [m for _, m in scored]
|
||||||
|
|
||||||
return matches
|
return matches
|
||||||
|
|||||||
@@ -31,6 +31,13 @@ class CacheSearchFetcher(BaseFetcher):
|
|||||||
logger.debug("Cache-search: skipped — no title")
|
logger.debug("Cache-search: skipped — no title")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# Fast path: exact metadata match (artist+title+album), single SQL query
|
||||||
|
exact = self._cache.find_best_positive(track)
|
||||||
|
if exact:
|
||||||
|
logger.info(f"Cache-search: exact hit ({exact.status.value})")
|
||||||
|
return exact
|
||||||
|
|
||||||
|
# Slow path: fuzzy cross-album search
|
||||||
matches = self._cache.search_by_meta(
|
matches = self._cache.search_by_meta(
|
||||||
artist=track.artist,
|
artist=track.artist,
|
||||||
title=track.title,
|
title=track.title,
|
||||||
@@ -55,7 +62,7 @@ class CacheSearchFetcher(BaseFetcher):
|
|||||||
|
|
||||||
status = CacheStatus(best["status"])
|
status = CacheStatus(best["status"])
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Cache-search: hit from [{best.get('source')}] "
|
f"Cache-search: fuzzy hit from [{best.get('source')}] "
|
||||||
f"album={best.get('album')!r} ({status.value})"
|
f"album={best.get('album')!r} ({status.value})"
|
||||||
)
|
)
|
||||||
return LyricResult(
|
return LyricResult(
|
||||||
|
|||||||
Reference in New Issue
Block a user